From 2c9aac8852d5e1444e839fa96f858c718ecdd82a Mon Sep 17 00:00:00 2001 From: Sicheng Pan Date: Wed, 12 Jul 2023 14:46:54 -0700 Subject: [PATCH 01/19] Implement lazyframe profiling and optimization toggles --- R/extendr-wrappers.R | 18 +++++++++ src/rust/src/lazy/dataframe.rs | 67 ++++++++++++++++++++++++++++++++-- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index cde638634..26c7406f3 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -945,6 +945,24 @@ LazyFrame$rename <- function(existing, new) .Call(wrap__LazyFrame__rename, self, LazyFrame$schema <- function() .Call(wrap__LazyFrame__schema, self) +LazyFrame$without_optimization <- function() .Call(wrap__LazyFrame__without_optimization, self) + +LazyFrame$with_projection_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_projection_pushdown, self, toggle) + +LazyFrame$with_predicate_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_predicate_pushdown, self, toggle) + +LazyFrame$with_type_coercion <- function(toggle) .Call(wrap__LazyFrame__with_type_coercion, self, toggle) + +LazyFrame$with_simplify_expr <- function(toggle) .Call(wrap__LazyFrame__with_simplify_expr, self, toggle) + +LazyFrame$with_slice_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_slice_pushdown, self, toggle) + +LazyFrame$with_common_subplan_elimination <- function(toggle) .Call(wrap__LazyFrame__with_common_subplan_elimination, self, toggle) + +LazyFrame$with_streaming <- function(toggle) .Call(wrap__LazyFrame__with_streaming, self, toggle) + +LazyFrame$profile <- function() .Call(wrap__LazyFrame__profile, self) + #' @export `$.LazyFrame` <- function (self, name) { func <- LazyFrame[[name]]; environment(func) <- environment(); func } diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index c8a622788..2cea44d8a 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -1,13 +1,13 @@ use crate::concurrent::{handle_thread_r_requests, PolarsBackgroundHandle}; use crate::conversion::strings_to_smartstrings; use crate::lazy::dsl::*; +use crate::rdataframe::DataFrame as RDF; use crate::rdatatype::new_join_type; use crate::rdatatype::new_quantile_interpolation_option; use crate::rdatatype::new_unique_keep_strategy; use crate::rdatatype::{new_asof_strategy, RPolarsDataType}; use crate::robj_to; -use crate::rpolarserr::RResult; -use crate::rpolarserr::{Rctx, WithRctx}; +use crate::rpolarserr::{polars_to_rpolars_err, RResult, Rctx, WithRctx}; use crate::utils::wrappers::null_to_opt; use crate::utils::{r_result_list, try_f64_into_usize}; use extendr_api::prelude::*; @@ -65,7 +65,7 @@ impl LazyFrame { PolarsBackgroundHandle::new(self) } - pub fn collect(&self) -> Result { + pub fn collect(&self) -> Result { handle_thread_r_requests(self.clone().0).map_err(|err| { //improve err messages let err_string = match err { @@ -79,7 +79,7 @@ impl LazyFrame { }) } - pub fn collect_handled(&self) -> crate::rpolarserr::RResult { + pub fn collect_handled(&self) -> RResult { use crate::rpolarserr::WithRctx; handle_thread_r_requests(self.clone().0).when("calling $collect() on LazyFrame") } @@ -377,6 +377,65 @@ impl LazyFrame { pairs.map(|(name, ty)| (name, RPolarsDataType(ty.clone()))), )) } + + fn without_optimization(&self) -> Self { + self.0.clone().without_optimizations().into() + } + + fn with_projection_pushdown(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0 + .clone() + .with_projection_pushdown(robj_to!(bool, toggle)?), + )) + } + + fn with_predicate_pushdown(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0 + .clone() + .with_predicate_pushdown(robj_to!(bool, toggle)?), + )) + } + + fn with_type_coercion(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0.clone().with_type_coercion(robj_to!(bool, toggle)?), + )) + } + + fn with_simplify_expr(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0.clone().with_simplify_expr(robj_to!(bool, toggle)?), + )) + } + + fn with_slice_pushdown(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0.clone().with_slice_pushdown(robj_to!(bool, toggle)?), + )) + } + + fn with_common_subplan_elimination(&self, toggle: Robj) -> RResult { + Ok(Self( + self.0 + .clone() + .with_common_subplan_elimination(robj_to!(bool, toggle)?), + )) + } + + fn with_streaming(&self, toggle: Robj) -> RResult { + Ok(Self(self.0.clone().with_streaming(robj_to!(bool, toggle)?))) + } + + fn profile(&self) -> RResult { + self.0 + .clone() + .profile() + .map(|(r, p)| pairlist!(result = RDF(r), profile = RDF(p))) + .map_err(polars_to_rpolars_err) + .when("profiling the LazyFrame") + } } #[derive(Clone)] From ec74abbfcf48c1e8f7ba350915f4f2f2007c34bf Mon Sep 17 00:00:00 2001 From: Sicheng Pan Date: Wed, 12 Jul 2023 15:28:57 -0700 Subject: [PATCH 02/19] Implement R wrappers --- R/lazyframe__lazy.R | 126 ++++++++++++++++++ man/LazyFrame_profile.Rd | 21 +++ ...zyFrame_with_common_subplan_elimination.Rd | 23 ++++ man/LazyFrame_with_predicate_pushdown.Rd | 23 ++++ man/LazyFrame_with_projection_pushdown.Rd | 23 ++++ man/LazyFrame_with_simplify_expr.Rd | 23 ++++ man/LazyFrame_with_slice_pushdown.Rd | 23 ++++ man/LazyFrame_with_streaming.Rd | 23 ++++ man/LazyFrame_with_type_coercion.Rd | 23 ++++ man/LazyFrame_without_optimization.Rd | 24 ++++ src/rust/src/lazy/dataframe.rs | 2 +- 11 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 man/LazyFrame_profile.Rd create mode 100644 man/LazyFrame_with_common_subplan_elimination.Rd create mode 100644 man/LazyFrame_with_predicate_pushdown.Rd create mode 100644 man/LazyFrame_with_projection_pushdown.Rd create mode 100644 man/LazyFrame_with_simplify_expr.Rd create mode 100644 man/LazyFrame_with_slice_pushdown.Rd create mode 100644 man/LazyFrame_with_streaming.Rd create mode 100644 man/LazyFrame_with_type_coercion.Rd create mode 100644 man/LazyFrame_without_optimization.Rd diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index e82f8466b..8c51b52e4 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -911,3 +911,129 @@ LazyFrame_dtypes = method_as_property(function() { result() |> unwrap("in $dtypes()") }) + +#' @title Profile +#' @keywords LazyFrame +#' @return A pair of DataFrames, (collected result, profile stats) +#' @examples +#' pl$LazyFrame(mtcars)$ +#' select(pl$col("mpg") * 0.43) +#' profile() +#' +LazyFrame_profile = function() { + .pr$LazyFrame$profile(self) |> unwrap("in $profile()") +} + +#' @title Without_optimization +#' @keywords LazyFrame +#' @return A new LazyFrame with optimizations disabled +#' @examples +#' pl$LazyFrame(mtcars)$ +#' without_optimization() +#' +LazyFrame_without_optimization = "use_extendr_wrapper" + +#' @title With_projection_pushdown +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_projection_pushdown(FALSE) +#' +LazyFrame_with_projection_pushdown = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_projection_pushdown(self, toggle) |> + unwrap("in $with_projection_pushdown()") +} + +#' @title With_predicate_pushdown +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_predicate_pushdown(FALSE) +#' +LazyFrame_with_predicate_pushdown = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_predicate_pushdown(self, toggle) |> + unwrap("in $with_predicate_pushdown()") +} + +#' @title With_type_coercion +#' @keywords LazyFrame + #' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_type_coercion(FALSE) +#' +LazyFrame_with_type_coercion = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_type_coercion(self, toggle) |> + unwrap("in $with_type_coercion()") +} + +#' @title With_simplify_expr +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_simplify_expr(FALSE) +#' +LazyFrame_with_simplify_expr = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_simplify_expr(self, toggle) |> + unwrap("in $with_simplify_expr()") +} + +#' @title With_slice_pushdown +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_slice_pushdown(FALSE) +#' +LazyFrame_with_slice_pushdown = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_slice_pushdown(self, toggle) |> + unwrap("in $with_slice_pushdown()") +} + +#' @title With_common_subplan_elimination +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_common_subplan_elimination(FALSE) +#' +LazyFrame_with_common_subplan_elimination = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_common_subplan_elimination(self, toggle) |> + unwrap("in $with_common_subplan_elimination()") +} + +#' @title With_streaming +#' @keywords LazyFrame +#' @param toggle whether the optimization is turned on +#' @return A new LazyFrame with specified optimization scheme +#' @examples +#' pl$LazyFrame(mtcars)$ +#' with_streaming(FALSE) +#' +LazyFrame_with_streaming = function ( + toggle = TRUE # : bool +) { + .pr$LazyFrame$with_streaming(self, toggle) |> + unwrap("in $with_streaming()") +} diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd new file mode 100644 index 000000000..fba60fd17 --- /dev/null +++ b/man/LazyFrame_profile.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_profile} +\alias{LazyFrame_profile} +\title{Profile} +\usage{ +LazyFrame_profile() +} +\value{ +A pair of DataFrames, (collected result, profile stats) +} +\description{ +Profile +} +\examples{ +pl$LazyFrame(mtcars)$ + select(pl$col("mpg") * 0.43) + profile() + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_common_subplan_elimination.Rd b/man/LazyFrame_with_common_subplan_elimination.Rd new file mode 100644 index 000000000..231ac42c5 --- /dev/null +++ b/man/LazyFrame_with_common_subplan_elimination.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_common_subplan_elimination} +\alias{LazyFrame_with_common_subplan_elimination} +\title{With_common_subplan_elimination} +\usage{ +LazyFrame_with_common_subplan_elimination(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_common_subplan_elimination +} +\examples{ +pl$LazyFrame(mtcars)$ + with_common_subplan_elimination(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_predicate_pushdown.Rd b/man/LazyFrame_with_predicate_pushdown.Rd new file mode 100644 index 000000000..0c8bfa43b --- /dev/null +++ b/man/LazyFrame_with_predicate_pushdown.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_predicate_pushdown} +\alias{LazyFrame_with_predicate_pushdown} +\title{With_predicate_pushdown} +\usage{ +LazyFrame_with_predicate_pushdown(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_predicate_pushdown +} +\examples{ +pl$LazyFrame(mtcars)$ + with_predicate_pushdown(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_projection_pushdown.Rd b/man/LazyFrame_with_projection_pushdown.Rd new file mode 100644 index 000000000..26e615eef --- /dev/null +++ b/man/LazyFrame_with_projection_pushdown.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_projection_pushdown} +\alias{LazyFrame_with_projection_pushdown} +\title{With_projection_pushdown} +\usage{ +LazyFrame_with_projection_pushdown(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_projection_pushdown +} +\examples{ +pl$LazyFrame(mtcars)$ + with_projection_pushdown(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_simplify_expr.Rd b/man/LazyFrame_with_simplify_expr.Rd new file mode 100644 index 000000000..b8a2fdde9 --- /dev/null +++ b/man/LazyFrame_with_simplify_expr.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_simplify_expr} +\alias{LazyFrame_with_simplify_expr} +\title{With_simplify_expr} +\usage{ +LazyFrame_with_simplify_expr(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_simplify_expr +} +\examples{ +pl$LazyFrame(mtcars)$ + with_simplify_expr(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_slice_pushdown.Rd b/man/LazyFrame_with_slice_pushdown.Rd new file mode 100644 index 000000000..258ae6f0f --- /dev/null +++ b/man/LazyFrame_with_slice_pushdown.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_slice_pushdown} +\alias{LazyFrame_with_slice_pushdown} +\title{With_slice_pushdown} +\usage{ +LazyFrame_with_slice_pushdown(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_slice_pushdown +} +\examples{ +pl$LazyFrame(mtcars)$ + with_slice_pushdown(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_streaming.Rd b/man/LazyFrame_with_streaming.Rd new file mode 100644 index 000000000..c04bd705c --- /dev/null +++ b/man/LazyFrame_with_streaming.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_streaming} +\alias{LazyFrame_with_streaming} +\title{With_streaming} +\usage{ +LazyFrame_with_streaming(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_streaming +} +\examples{ +pl$LazyFrame(mtcars)$ + with_streaming(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_with_type_coercion.Rd b/man/LazyFrame_with_type_coercion.Rd new file mode 100644 index 000000000..aef6780a0 --- /dev/null +++ b/man/LazyFrame_with_type_coercion.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_with_type_coercion} +\alias{LazyFrame_with_type_coercion} +\title{With_type_coercion} +\usage{ +LazyFrame_with_type_coercion(toggle = TRUE) +} +\arguments{ +\item{toggle}{whether the optimization is turned on} +} +\value{ +A new LazyFrame with specified optimization scheme +} +\description{ +With_type_coercion +} +\examples{ +pl$LazyFrame(mtcars)$ + with_type_coercion(FALSE) + +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_without_optimization.Rd b/man/LazyFrame_without_optimization.Rd new file mode 100644 index 000000000..580a8f118 --- /dev/null +++ b/man/LazyFrame_without_optimization.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\docType{data} +\name{LazyFrame_without_optimization} +\alias{LazyFrame_without_optimization} +\title{Without_optimization} +\format{ +An object of class \code{character} of length 1. +} +\usage{ +LazyFrame_without_optimization +} +\value{ +A new LazyFrame with optimizations disabled +} +\description{ +Without_optimization +} +\examples{ +pl$LazyFrame(mtcars)$ + without_optimization() + +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 2cea44d8a..78ea49521 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -434,7 +434,7 @@ impl LazyFrame { .profile() .map(|(r, p)| pairlist!(result = RDF(r), profile = RDF(p))) .map_err(polars_to_rpolars_err) - .when("profiling the LazyFrame") + .hint("the data is already available and requires no computation") } } From 8d4ab37de71e291fc2b43c2aaed9ccd9a5d3cc44 Mon Sep 17 00:00:00 2001 From: Sicheng Pan Date: Wed, 12 Jul 2023 15:32:30 -0700 Subject: [PATCH 03/19] Fix typo --- R/lazyframe__lazy.R | 2 +- man/LazyFrame_profile.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 8c51b52e4..6c03d3a33 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -917,7 +917,7 @@ LazyFrame_dtypes = method_as_property(function() { #' @return A pair of DataFrames, (collected result, profile stats) #' @examples #' pl$LazyFrame(mtcars)$ -#' select(pl$col("mpg") * 0.43) +#' select(pl$col("mpg") * 0.43)$ #' profile() #' LazyFrame_profile = function() { diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index fba60fd17..780638718 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -14,7 +14,7 @@ Profile } \examples{ pl$LazyFrame(mtcars)$ - select(pl$col("mpg") * 0.43) + select(pl$col("mpg") * 0.43)$ profile() } From f9d2826cf736c61949ef561422e8b6171b0e1423 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 13 Jul 2023 16:26:07 +0200 Subject: [PATCH 04/19] align with py-polars API --- R/extendr-wrappers.R | 16 +- R/lazyframe__lazy.R | 177 ++++++------------ man/LazyFrame_collect.Rd | 41 +++- man/LazyFrame_collect_background.Rd | 5 +- ...zyFrame_with_common_subplan_elimination.Rd | 23 --- man/LazyFrame_with_predicate_pushdown.Rd | 23 --- man/LazyFrame_with_projection_pushdown.Rd | 23 --- man/LazyFrame_with_simplify_expr.Rd | 23 --- man/LazyFrame_with_slice_pushdown.Rd | 23 --- man/LazyFrame_with_streaming.Rd | 23 --- man/LazyFrame_with_type_coercion.Rd | 23 --- man/LazyFrame_without_optimization.Rd | 24 --- src/rust/src/lazy/dataframe.rs | 77 +++----- tests/testthat/test-lazy_profile.R | 8 + 14 files changed, 140 insertions(+), 369 deletions(-) delete mode 100644 man/LazyFrame_with_common_subplan_elimination.Rd delete mode 100644 man/LazyFrame_with_predicate_pushdown.Rd delete mode 100644 man/LazyFrame_with_projection_pushdown.Rd delete mode 100644 man/LazyFrame_with_simplify_expr.Rd delete mode 100644 man/LazyFrame_with_slice_pushdown.Rd delete mode 100644 man/LazyFrame_with_streaming.Rd delete mode 100644 man/LazyFrame_with_type_coercion.Rd delete mode 100644 man/LazyFrame_without_optimization.Rd create mode 100644 tests/testthat/test-lazy_profile.R diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 26c7406f3..4101eafbf 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -945,21 +945,7 @@ LazyFrame$rename <- function(existing, new) .Call(wrap__LazyFrame__rename, self, LazyFrame$schema <- function() .Call(wrap__LazyFrame__schema, self) -LazyFrame$without_optimization <- function() .Call(wrap__LazyFrame__without_optimization, self) - -LazyFrame$with_projection_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_projection_pushdown, self, toggle) - -LazyFrame$with_predicate_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_predicate_pushdown, self, toggle) - -LazyFrame$with_type_coercion <- function(toggle) .Call(wrap__LazyFrame__with_type_coercion, self, toggle) - -LazyFrame$with_simplify_expr <- function(toggle) .Call(wrap__LazyFrame__with_simplify_expr, self, toggle) - -LazyFrame$with_slice_pushdown <- function(toggle) .Call(wrap__LazyFrame__with_slice_pushdown, self, toggle) - -LazyFrame$with_common_subplan_elimination <- function(toggle) .Call(wrap__LazyFrame__with_common_subplan_elimination, self, toggle) - -LazyFrame$with_streaming <- function(toggle) .Call(wrap__LazyFrame__with_streaming, self, toggle) +LazyFrame$optimization_toggle <- function(type_coercion, predicate_pushdown, projection_pushdown, simplify_expr, slice_pushdown, cse, streaming) .Call(wrap__LazyFrame__optimization_toggle, self, type_coercion, predicate_pushdown, projection_pushdown, simplify_expr, slice_pushdown, cse, streaming) LazyFrame$profile <- function() .Call(wrap__LazyFrame__profile, self) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 6c03d3a33..a439547b4 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -251,15 +251,71 @@ LazyFrame_filter = "use_extendr_wrapper" #' @title New DataFrame from LazyFrame_object$collect() #' @description collect DataFrame by lazy query +#' @param type_coercion Boolean. Do type coercion optimization. +#' @param predicate_pushdown Boolean. Do predicate pushdown optimization. +#' @param projection_pushdown Boolean. Do projection pushdown optimization. +#' @param simplify_expression Boolean. Run simplify expressions optimization. +#' @param no_optimization Boolean. Turn off (certain) optimizations. +#' @param slice_pushdown Boolean. Slice pushdown optimization. +#' @param common_subplan_elimination Boolean. Will try to cache branching subplans that occur on +#' self-joins or unions. +#' @param streaming Boolean. Run parts of the query in a streaming fashion +#' (this is in an alpha state) +#' @param collect_in_background Boolean. Detach this query from R session. Computation will start +#' in background. Get a handle which later can be converted into the resulting DataFrame. Useful +#' in interactive mode to not lock R session. +#' @details +#' Note: use `$fetch()` if you want to run your query on the first `n` rows only. +#' This can be a huge time saver in debugging queries. #' @keywords LazyFrame DataFrame_new -#' @return collected `DataFrame` +#' @return collected `DataFrame` or if colkect #' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() -LazyFrame_collect = function() { - unwrap(.pr$LazyFrame$collect_handled(self), "in $collect():") +LazyFrame_collect = function( + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + no_optimization = FALSE, + slice_pushdown = TRUE, + common_subplan_elimination = TRUE, + streaming = FALSE, + collect_in_background = FALSE +) { + + if (isTRUE(no_optimization)) { + predicate_pushdown = FALSE + projection_pushdown = FALSE + slice_pushdown = FALSE + common_subplan_elimination = FALSE + } + + if (isTRUE(streaming)) { + common_subplan_elimination = FALSE + } + + collect_f = if( isTRUE(collect_in_background)) { + .pr$LazyFrame$collect_background + } else { + .pr$LazyFrame$collect_handled + } + + self |> + .pr$LazyFrame$optimization_toggle( + type_coercion, + predicate_pushdown, + projection_pushdown, + simplify_expression, + slice_pushdown, + common_subplan_elimination, + streaming + ) |> + and_then(collect_f) |> + unwrap("in $collect():") } #' @title New DataFrame from LazyFrame_object$collect() -#' @description collect DataFrame by lazy query +#' @description collect DataFrame by lazy query (SOFT DEPRECATED) +#' @details This function is soft deprecated. Use $collect(collect_in_background = TRUE) instead #' @keywords LazyFrame DataFrame_new #' @return collected `DataFrame` #' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() @@ -924,116 +980,3 @@ LazyFrame_profile = function() { .pr$LazyFrame$profile(self) |> unwrap("in $profile()") } -#' @title Without_optimization -#' @keywords LazyFrame -#' @return A new LazyFrame with optimizations disabled -#' @examples -#' pl$LazyFrame(mtcars)$ -#' without_optimization() -#' -LazyFrame_without_optimization = "use_extendr_wrapper" - -#' @title With_projection_pushdown -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_projection_pushdown(FALSE) -#' -LazyFrame_with_projection_pushdown = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_projection_pushdown(self, toggle) |> - unwrap("in $with_projection_pushdown()") -} - -#' @title With_predicate_pushdown -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_predicate_pushdown(FALSE) -#' -LazyFrame_with_predicate_pushdown = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_predicate_pushdown(self, toggle) |> - unwrap("in $with_predicate_pushdown()") -} - -#' @title With_type_coercion -#' @keywords LazyFrame - #' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_type_coercion(FALSE) -#' -LazyFrame_with_type_coercion = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_type_coercion(self, toggle) |> - unwrap("in $with_type_coercion()") -} - -#' @title With_simplify_expr -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_simplify_expr(FALSE) -#' -LazyFrame_with_simplify_expr = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_simplify_expr(self, toggle) |> - unwrap("in $with_simplify_expr()") -} - -#' @title With_slice_pushdown -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_slice_pushdown(FALSE) -#' -LazyFrame_with_slice_pushdown = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_slice_pushdown(self, toggle) |> - unwrap("in $with_slice_pushdown()") -} - -#' @title With_common_subplan_elimination -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_common_subplan_elimination(FALSE) -#' -LazyFrame_with_common_subplan_elimination = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_common_subplan_elimination(self, toggle) |> - unwrap("in $with_common_subplan_elimination()") -} - -#' @title With_streaming -#' @keywords LazyFrame -#' @param toggle whether the optimization is turned on -#' @return A new LazyFrame with specified optimization scheme -#' @examples -#' pl$LazyFrame(mtcars)$ -#' with_streaming(FALSE) -#' -LazyFrame_with_streaming = function ( - toggle = TRUE # : bool -) { - .pr$LazyFrame$with_streaming(self, toggle) |> - unwrap("in $with_streaming()") -} diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index 1de6d2e41..7ecf73115 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -4,14 +4,51 @@ \alias{LazyFrame_collect} \title{New DataFrame from LazyFrame_object$collect()} \usage{ -LazyFrame_collect() +LazyFrame_collect( + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + no_optimization = FALSE, + slice_pushdown = TRUE, + common_subplan_elimination = TRUE, + streaming = FALSE, + collect_in_background = FALSE +) +} +\arguments{ +\item{type_coercion}{Boolean. Do type coercion optimization.} + +\item{predicate_pushdown}{Boolean. Do predicate pushdown optimization.} + +\item{projection_pushdown}{Boolean. Do projection pushdown optimization.} + +\item{simplify_expression}{Boolean. Run simplify expressions optimization.} + +\item{no_optimization}{Boolean. Turn off (certain) optimizations.} + +\item{slice_pushdown}{Boolean. Slice pushdown optimization.} + +\item{common_subplan_elimination}{Boolean. Will try to cache branching subplans that occur on +self-joins or unions.} + +\item{streaming}{Boolean. Run parts of the query in a streaming fashion +(this is in an alpha state)} + +\item{collect_in_background}{Boolean. Detach this query from R session. Computation will start +in background. Get a handle which later can be converted into the resulting DataFrame. Useful +in interactive mode to not lock R session.} } \value{ -collected \code{DataFrame} +collected \code{DataFrame} or if colkect } \description{ collect DataFrame by lazy query } +\details{ +Note: use \verb{$fetch()} if you want to run your query on the first \code{n} rows only. +This can be a huge time saver in debugging queries. +} \examples{ pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() } diff --git a/man/LazyFrame_collect_background.Rd b/man/LazyFrame_collect_background.Rd index 4506b06fd..9710c97b1 100644 --- a/man/LazyFrame_collect_background.Rd +++ b/man/LazyFrame_collect_background.Rd @@ -10,7 +10,10 @@ LazyFrame_collect_background() collected \code{DataFrame} } \description{ -collect DataFrame by lazy query +collect DataFrame by lazy query (SOFT DEPRECATED) +} +\details{ +This function is soft deprecated. Use $collect(collect_in_background = TRUE) instead } \examples{ pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() diff --git a/man/LazyFrame_with_common_subplan_elimination.Rd b/man/LazyFrame_with_common_subplan_elimination.Rd deleted file mode 100644 index 231ac42c5..000000000 --- a/man/LazyFrame_with_common_subplan_elimination.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_common_subplan_elimination} -\alias{LazyFrame_with_common_subplan_elimination} -\title{With_common_subplan_elimination} -\usage{ -LazyFrame_with_common_subplan_elimination(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_common_subplan_elimination -} -\examples{ -pl$LazyFrame(mtcars)$ - with_common_subplan_elimination(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_predicate_pushdown.Rd b/man/LazyFrame_with_predicate_pushdown.Rd deleted file mode 100644 index 0c8bfa43b..000000000 --- a/man/LazyFrame_with_predicate_pushdown.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_predicate_pushdown} -\alias{LazyFrame_with_predicate_pushdown} -\title{With_predicate_pushdown} -\usage{ -LazyFrame_with_predicate_pushdown(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_predicate_pushdown -} -\examples{ -pl$LazyFrame(mtcars)$ - with_predicate_pushdown(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_projection_pushdown.Rd b/man/LazyFrame_with_projection_pushdown.Rd deleted file mode 100644 index 26e615eef..000000000 --- a/man/LazyFrame_with_projection_pushdown.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_projection_pushdown} -\alias{LazyFrame_with_projection_pushdown} -\title{With_projection_pushdown} -\usage{ -LazyFrame_with_projection_pushdown(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_projection_pushdown -} -\examples{ -pl$LazyFrame(mtcars)$ - with_projection_pushdown(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_simplify_expr.Rd b/man/LazyFrame_with_simplify_expr.Rd deleted file mode 100644 index b8a2fdde9..000000000 --- a/man/LazyFrame_with_simplify_expr.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_simplify_expr} -\alias{LazyFrame_with_simplify_expr} -\title{With_simplify_expr} -\usage{ -LazyFrame_with_simplify_expr(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_simplify_expr -} -\examples{ -pl$LazyFrame(mtcars)$ - with_simplify_expr(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_slice_pushdown.Rd b/man/LazyFrame_with_slice_pushdown.Rd deleted file mode 100644 index 258ae6f0f..000000000 --- a/man/LazyFrame_with_slice_pushdown.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_slice_pushdown} -\alias{LazyFrame_with_slice_pushdown} -\title{With_slice_pushdown} -\usage{ -LazyFrame_with_slice_pushdown(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_slice_pushdown -} -\examples{ -pl$LazyFrame(mtcars)$ - with_slice_pushdown(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_streaming.Rd b/man/LazyFrame_with_streaming.Rd deleted file mode 100644 index c04bd705c..000000000 --- a/man/LazyFrame_with_streaming.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_streaming} -\alias{LazyFrame_with_streaming} -\title{With_streaming} -\usage{ -LazyFrame_with_streaming(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_streaming -} -\examples{ -pl$LazyFrame(mtcars)$ - with_streaming(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_with_type_coercion.Rd b/man/LazyFrame_with_type_coercion.Rd deleted file mode 100644 index aef6780a0..000000000 --- a/man/LazyFrame_with_type_coercion.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\name{LazyFrame_with_type_coercion} -\alias{LazyFrame_with_type_coercion} -\title{With_type_coercion} -\usage{ -LazyFrame_with_type_coercion(toggle = TRUE) -} -\arguments{ -\item{toggle}{whether the optimization is turned on} -} -\value{ -A new LazyFrame with specified optimization scheme -} -\description{ -With_type_coercion -} -\examples{ -pl$LazyFrame(mtcars)$ - with_type_coercion(FALSE) - -} -\keyword{LazyFrame} diff --git a/man/LazyFrame_without_optimization.Rd b/man/LazyFrame_without_optimization.Rd deleted file mode 100644 index 580a8f118..000000000 --- a/man/LazyFrame_without_optimization.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lazyframe__lazy.R -\docType{data} -\name{LazyFrame_without_optimization} -\alias{LazyFrame_without_optimization} -\title{Without_optimization} -\format{ -An object of class \code{character} of length 1. -} -\usage{ -LazyFrame_without_optimization -} -\value{ -A new LazyFrame with optimizations disabled -} -\description{ -Without_optimization -} -\examples{ -pl$LazyFrame(mtcars)$ - without_optimization() - -} -\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 78ea49521..a2117f8e5 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -378,61 +378,40 @@ impl LazyFrame { )) } - fn without_optimization(&self) -> Self { - self.0.clone().without_optimizations().into() - } - - fn with_projection_pushdown(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0 - .clone() - .with_projection_pushdown(robj_to!(bool, toggle)?), - )) - } - - fn with_predicate_pushdown(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0 - .clone() - .with_predicate_pushdown(robj_to!(bool, toggle)?), - )) - } - - fn with_type_coercion(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0.clone().with_type_coercion(robj_to!(bool, toggle)?), - )) - } - - fn with_simplify_expr(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0.clone().with_simplify_expr(robj_to!(bool, toggle)?), - )) - } - - fn with_slice_pushdown(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0.clone().with_slice_pushdown(robj_to!(bool, toggle)?), - )) - } - - fn with_common_subplan_elimination(&self, toggle: Robj) -> RResult { - Ok(Self( - self.0 - .clone() - .with_common_subplan_elimination(robj_to!(bool, toggle)?), - )) - } + #[allow(clippy::too_many_arguments)] + fn optimization_toggle( + &self, + type_coercion: Robj, + predicate_pushdown: Robj, + projection_pushdown: Robj, + simplify_expr: Robj, + slice_pushdown: Robj, + cse: Robj, + streaming: Robj, + ) -> RResult { + let ldf = self + .0 + .clone() + .with_type_coercion(robj_to!(bool, type_coercion)?) + .with_predicate_pushdown(robj_to!(bool, predicate_pushdown)?) + .with_simplify_expr(robj_to!(bool, simplify_expr)?) + .with_slice_pushdown(robj_to!(bool, slice_pushdown)?) + .with_streaming(robj_to!(bool, streaming)?) + .with_projection_pushdown(robj_to!(bool, projection_pushdown)?); + + #[cfg(feature = "cse")] + { + ldf = ldf.with_common_subplan_elimination(robj_to!(bool, cse)?); + } - fn with_streaming(&self, toggle: Robj) -> RResult { - Ok(Self(self.0.clone().with_streaming(robj_to!(bool, toggle)?))) + Ok(ldf.into()) } - fn profile(&self) -> RResult { + fn profile(&self) -> RResult { self.0 .clone() .profile() - .map(|(r, p)| pairlist!(result = RDF(r), profile = RDF(p))) + .map(|(r, p)| list!(result = RDF(r), profile = RDF(p))) .map_err(polars_to_rpolars_err) .hint("the data is already available and requires no computation") } diff --git a/tests/testthat/test-lazy_profile.R b/tests/testthat/test-lazy_profile.R new file mode 100644 index 000000000..0dd362c6f --- /dev/null +++ b/tests/testthat/test-lazy_profile.R @@ -0,0 +1,8 @@ +test_that("can modify lazy profile settings", { + #some way to check if .pr$LazyFrame$optimization_toggle works + + #toggle settings + #read back settings + #compare + +}) From b64fd8d4b834c93059c3ff2895ded69c07ef2fd4 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 3 Aug 2023 14:24:11 +0200 Subject: [PATCH 05/19] let profile do R fns, always cse feature, docs --- DESCRIPTION | 2 +- R/expr__expr.R | 3 +- R/extendr-wrappers.R | 2 - R/lazyframe__lazy.R | 31 +++++++++++--- man/LazyFrame_profile.Rd | 29 ++++++++++--- man/nanoarrow.Rd | 8 ++-- src/rust/Cargo.toml | 1 + src/rust/src/concurrent.rs | 69 +++++++++++++++++++++--------- src/rust/src/lazy/dataframe.rs | 39 ++++------------- src/rust/src/rdataframe/mod.rs | 4 +- tests/testthat/test-lazy_profile.R | 33 ++++++++++++++ 11 files changed, 149 insertions(+), 72 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1c8d61c25..522172062 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -97,5 +97,5 @@ Collate: 'translation.R' 'vctrs.R' 'zzz.R' -Config/rextendr/version: 0.3.1 +Config/rextendr/version: 0.3.1.9000 VignetteBuilder: knitr diff --git a/R/expr__expr.R b/R/expr__expr.R index c41f906b6..ed9018cb1 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1121,8 +1121,7 @@ Expr_map_alias = function(fun) { ) { assign(".warn_map_alias", 1L, envir = runtime_state) # it does not seem map alias is executed multi-threaded but rather immediately during building lazy query - # if ever crashing, any lazy method like select, filter, with_columns must use something like handle_thread_r_requests() - # then handle_thread_r_requests should be rewritten to handle any type. + # if ever crashing, any lazy method like select, filter, with_columns must use something like filter_with_r_func_support() message("map_alias function is experimentally without some thread-safeguards, please report any crashes") # TODO resolve } if (!is.function(fun)) pstop(err = "alias_map fun must be a function") diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index afb378779..02eff7686 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -881,8 +881,6 @@ LazyFrame$collect_background <- function() .Call(wrap__LazyFrame__collect_backgr LazyFrame$collect <- function() .Call(wrap__LazyFrame__collect, self) -LazyFrame$collect_handled <- function() .Call(wrap__LazyFrame__collect_handled, self) - LazyFrame$first <- function() .Call(wrap__LazyFrame__first, self) LazyFrame$last <- function() .Call(wrap__LazyFrame__last, self) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 555992f0d..22668c914 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -310,7 +310,7 @@ LazyFrame_collect = function( collect_f = if( isTRUE(collect_in_background)) { .pr$LazyFrame$collect_background } else { - .pr$LazyFrame$collect_handled + .pr$LazyFrame$collect } self |> @@ -982,14 +982,33 @@ LazyFrame_dtypes = method_as_property(function() { unwrap("in $dtypes()") }) -#' @title Profile +#' @title Collect and profile a lazy query. +#' @description This will run the query and return a tuple containing the materialized DataFrame and +#' a DataFrame that contains profiling information of each node that is executed. +#' @details The units of the timings are microseconds. +#' #' @keywords LazyFrame -#' @return A pair of DataFrames, (collected result, profile stats) +#' @return List of two DataFrames, (collected result, profile stats) #' @examples -#' pl$LazyFrame(mtcars)$ -#' select(pl$col("mpg") * 0.43)$ +#' +#' #Use $profile() to compare two queries +#' +#' # print one '.', take a series convert to r vector, take first value, add 5 +#' r_func = \(s) {cat(".");s$to_r()[1] + 5} +#' +#' # map each Species-group of each numeric column with an R function, takes ~7000us slow ! +#' pl$LazyFrame(iris)$ +#' sort("Sepal.Length")$ #for no specific reason +#' groupby("Species", maintain_order = TRUE)$ +#' agg(pl$col(pl$Float64)$apply(r_func))$ #' profile() #' +#' # map each Species-group with native polars, takes ~120us better +#' pl$LazyFrame(iris)$ +#' sort("Sepal.Length")$ +#' groupby("Species", maintain_order = TRUE)$ +#' agg(pl$col(pl$Float64)$first() + 5 )$ +#' profile() LazyFrame_profile = function() { .pr$LazyFrame$profile(self) |> unwrap("in $profile()") } @@ -1014,4 +1033,4 @@ LazyFrame_explode = function(columns = list(), ...) { dotdotdot_args = list2(...) .pr$LazyFrame$explode(self, columns, dotdotdot_args) |> unwrap("in explode():") -} \ No newline at end of file +} diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index 780638718..aa307e71a 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -2,20 +2,39 @@ % Please edit documentation in R/lazyframe__lazy.R \name{LazyFrame_profile} \alias{LazyFrame_profile} -\title{Profile} +\title{Collect and profile a lazy query.} \usage{ LazyFrame_profile() } \value{ -A pair of DataFrames, (collected result, profile stats) +List of two DataFrames, (collected result, profile stats) } \description{ -Profile +This will run the query and return a tuple containing the materialized DataFrame and +a DataFrame that contains profiling information of each node that is executed. +} +\details{ +The units of the timings are microseconds. } \examples{ -pl$LazyFrame(mtcars)$ - select(pl$col("mpg") * 0.43)$ + +#Use $profile() to compare two queries + +# print one '.', take a series convert to r vector, take first value, add 5 +r_func = \(s) {cat(".");s$to_r()[1] + 5} + +# map each Species-group of each numeric column with an R function, takes ~7000us slow ! +pl$LazyFrame(iris)$ + sort("Sepal.Length")$ #for no specific reason + groupby("Species", maintain_order = TRUE)$ + agg(pl$col(pl$Float64)$apply(r_func))$ profile() +# map each Species-group with native polars, takes ~120us better +pl$LazyFrame(iris)$ + sort("Sepal.Length")$ + groupby("Species", maintain_order = TRUE)$ + agg(pl$col(pl$Float64)$first() + 5 )$ + profile() } \keyword{LazyFrame} diff --git a/man/nanoarrow.Rd b/man/nanoarrow.Rd index 7af2018a2..3ecaf02a4 100644 --- a/man/nanoarrow.Rd +++ b/man/nanoarrow.Rd @@ -16,13 +16,13 @@ \alias{as_record_batch_reader.DataFrame} \title{polars to nanoarrow and arrow} \usage{ -as_nanoarrow_array_stream.DataFrame(x, ..., schema = NULL) +\method{as_nanoarrow_array_stream}{DataFrame}(x, ..., schema = NULL) -infer_nanoarrow_schema.DataFrame(x, ...) +\method{infer_nanoarrow_schema}{DataFrame}(x, ...) -as_arrow_table.DataFrame(x, ...) +\method{as_arrow_table}{DataFrame}(x, ...) -as_record_batch_reader.DataFrame(x, ..., schema = NULL) +\method{as_record_batch_reader}{DataFrame}(x, ..., schema = NULL) } \arguments{ \item{x}{a polars DataFrame} diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 85e6b81e9..5a8ec4e9b 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -80,6 +80,7 @@ features = [ "repeat_by", "interpolate", #"list", + "cse", "ewma", "rank", "diff", diff --git a/src/rust/src/concurrent.rs b/src/rust/src/concurrent.rs index f39c538a9..3f9dae67f 100644 --- a/src/rust/src/concurrent.rs +++ b/src/rust/src/concurrent.rs @@ -9,11 +9,36 @@ use crate::CONFIG; use polars::prelude as pl; use crate::rdataframe::Series; +use crate::rpolarserr::*; use extendr_api::prelude::*; use extendr_api::Conversions; use std::result::Result; use std::thread::JoinHandle; +// create the standard closure to serve R maps. This is how main thread which can do +// R calls should process a request from a polars thread worker to run an R function +fn make_closure_serve_r() -> impl Fn( + (ParRObj, polars::prelude::Series), +) -> Result> { + |(probj, s): (ParRObj, pl::Series)| -> Result> { + //unpack user-R-function + let f = probj.0.as_function().ok_or_else(|| { + extendr_api::error::Error::Other(format!( + "provided input is not an R function but a: {:?}", + probj.0 + )) + })?; + + // call user-R-function with Series as input, return Robj (likeliy as Series) + let rseries_robj = f.call(pairlist!(Series(s)))?; + + // return of user-R-function may not be Series, return Err if so + let s = Series::any_robj_to_pl_series_result(rseries_robj)?; + + Ok(s) + } +} + // This functions allows to call .collect() on polars lazy frame. A lazy frame may contain user defined functions // which could call R from any spawned thread by polars. This function is a bridge between multithraedded polars // and mostly single threaded only R @@ -23,12 +48,10 @@ use std::thread::JoinHandle; //1: What to run (polars collect), 2: what handler to execute code //3: Expr.map or such, passes an R user defined function in a closure to the lazyframe. This closure describes //how to request R functions run on the mainthread with a ThreadCom object. - -pub fn handle_thread_r_requests(lazy_df: pl::LazyFrame) -> pl::PolarsResult { +pub fn collect_with_r_func_support(lazy_df: pl::LazyFrame) -> pl::PolarsResult { let res_res_df = concurrent_handler( // Closure 1: start concurrent handler and get final result // This what we want to do in the first place ... to run LazyFrame.collect() thread-safe - //this closure gets spawned in a child of main thred, tc is a ThreadCom struct move |tc| { //start polars .collect, which it self can spawn many new threads @@ -45,23 +68,7 @@ pub fn handle_thread_r_requests(lazy_df: pl::LazyFrame) -> pl::PolarsResult Result> { - //unpack user-R-function - let f = probj.0.as_function().ok_or_else(|| { - extendr_api::error::Error::Other(format!( - "provided input is not an R function but a: {:?}", - probj.0 - )) - })?; - - // call user-R-function with Series as input, return Robj (likeliy as Series) - let rseries_robj = f.call(pairlist!(Series(s)))?; - - // return of user-R-function may not be Series, return Err if so - let s = Series::any_robj_to_pl_series_result(rseries_robj)?; - - Ok(s) - }, + make_closure_serve_r(), //CONFIG is "global variable" where threads can request a new ThreadCom &CONFIG, ); @@ -81,6 +88,28 @@ pub fn handle_thread_r_requests(lazy_df: pl::LazyFrame) -> pl::PolarsResult RResult<(pl::DataFrame, pl::DataFrame)> { + let res_res_df: Result< + Result<(pl::DataFrame, pl::DataFrame), pl::PolarsError>, + Box, + > = concurrent_handler( + move |tc| { + let retval = lazy_df.profile(); + ThreadCom::kill_global(&CONFIG); + drop(tc); + retval + }, + make_closure_serve_r(), + &CONFIG, + ); + + res_res_df + .map_err(|err| RPolarsErr::new().plain(err.to_string()))? + .map_err(polars_to_rpolars_err) +} + #[derive(Debug, Default)] pub struct PolarsBackgroundHandle(Option>>); diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index fbcc00191..62c95cf34 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -1,4 +1,6 @@ -use crate::concurrent::{handle_thread_r_requests, PolarsBackgroundHandle}; +use crate::concurrent::{ + collect_with_r_func_support, profile_with_r_func_support, PolarsBackgroundHandle, +}; use crate::conversion::strings_to_smartstrings; use crate::lazy::dsl::*; use crate::rdataframe::DataFrame as RDF; @@ -7,7 +9,7 @@ use crate::rdatatype::new_quantile_interpolation_option; use crate::rdatatype::new_unique_keep_strategy; use crate::rdatatype::{new_asof_strategy, RPolarsDataType}; use crate::robj_to; -use crate::rpolarserr::{rerr, polars_to_rpolars_err, RResult, Rctx, WithRctx}; +use crate::rpolarserr::{rerr, RResult, Rctx, WithRctx}; use crate::utils::wrappers::null_to_opt; use crate::utils::{r_result_list, try_f64_into_usize}; use extendr_api::prelude::*; @@ -65,23 +67,8 @@ impl LazyFrame { PolarsBackgroundHandle::new(self) } - pub fn collect(&self) -> Result { - handle_thread_r_requests(self.clone().0).map_err(|err| { - //improve err messages - let err_string = match err { - pl::PolarsError::InvalidOperation(x) => { - format!("Something (Likely a Column) named {:?} was not found", x) - } - x => format!("{:?}", x), - }; - - format!("when calling $collect() on LazyFrame:\n{}", err_string) - }) - } - - pub fn collect_handled(&self) -> RResult { - use crate::rpolarserr::WithRctx; - handle_thread_r_requests(self.clone().0).when("calling $collect() on LazyFrame") + pub fn collect(&self) -> RResult { + collect_with_r_func_support(self.clone().0).when("calling $collect() on LazyFrame") } fn first(&self) -> Self { @@ -408,23 +395,15 @@ impl LazyFrame { .with_simplify_expr(robj_to!(bool, simplify_expr)?) .with_slice_pushdown(robj_to!(bool, slice_pushdown)?) .with_streaming(robj_to!(bool, streaming)?) - .with_projection_pushdown(robj_to!(bool, projection_pushdown)?); - - #[cfg(feature = "cse")] - { - ldf = ldf.with_common_subplan_elimination(robj_to!(bool, cse)?); - } + .with_projection_pushdown(robj_to!(bool, projection_pushdown)?) + .with_common_subplan_elimination(robj_to!(bool, cse)?); Ok(ldf.into()) } fn profile(&self) -> RResult { - self.0 - .clone() - .profile() + profile_with_r_func_support(self.0.clone()) .map(|(r, p)| list!(result = RDF(r), profile = RDF(p))) - .map_err(polars_to_rpolars_err) - .hint("the data is already available and requires no computation") } fn explode(&self, columns: Robj, dotdotdot_args: Robj) -> RResult { diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index b9090fa52..bb509f04b 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -279,7 +279,7 @@ impl DataFrame { } pub fn select(&mut self, exprs: Robj) -> RResult { - self.lazy().select(exprs)?.collect_handled() + self.lazy().select(exprs)?.collect() } //used in GroupBy, not DataFrame @@ -288,7 +288,7 @@ impl DataFrame { group_exprs: Robj, agg_exprs: Robj, maintain_order: Robj, - ) -> Result { + ) -> RResult { let group_exprs: Vec = robj_to!(VecPLExprCol, group_exprs)?; let agg_exprs: Vec = robj_to!(VecPLExprCol, agg_exprs)?; let maintain_order = robj_to!(Option, bool, maintain_order)?.unwrap_or(false); diff --git a/tests/testthat/test-lazy_profile.R b/tests/testthat/test-lazy_profile.R index 0dd362c6f..202092da7 100644 --- a/tests/testthat/test-lazy_profile.R +++ b/tests/testthat/test-lazy_profile.R @@ -4,5 +4,38 @@ test_that("can modify lazy profile settings", { #toggle settings #read back settings #compare + expect_identical(1,1) }) + +test_that("$profile", { + + #profile minimal test + p0 = pl$LazyFrame()$select(pl$lit(1:3)$alias("x"))$profile() + expect_true(inherits(p0,"list")) + expect_identical(p0$result$to_list(),list(x = 1:3)) + expect_identical(p0$profile$columns,c("node","start","end")) + + + #profile supports with and without R functions + p1 = pl$LazyFrame(iris)$ + sort("Sepal.Length")$ + groupby("Species", maintain_order = TRUE)$ + agg(pl$col(pl$Float64)$first()$add(5)$suffix("_apply"))$ + profile() + + r_func = \(s) s$to_r()[1] + 5 + p2 = pl$LazyFrame(iris)$ + sort("Sepal.Length")$ #for no specific reason + groupby("Species", maintain_order = TRUE)$ + agg(pl$col(pl$Float64)$apply(r_func))$ + profile() + + # map each Species-group with native polars, takes ~120us better + expect_identical( + p2$result$as_data_frame(), + p1$result$as_data_frame() + ) + +}) + From ec01a7ed3523e4b6b9589151dd7e5039b262a07a Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 3 Aug 2023 14:51:41 +0200 Subject: [PATCH 06/19] tidy up collect+profile --- src/rust/src/concurrent.rs | 106 ++++++++++++--------------------- src/rust/src/lazy/dataframe.rs | 5 +- 2 files changed, 40 insertions(+), 71 deletions(-) diff --git a/src/rust/src/concurrent.rs b/src/rust/src/concurrent.rs index 3f9dae67f..2c90e5d11 100644 --- a/src/rust/src/concurrent.rs +++ b/src/rust/src/concurrent.rs @@ -15,99 +15,69 @@ use extendr_api::Conversions; use std::result::Result; use std::thread::JoinHandle; -// create the standard closure to serve R maps. This is how main thread which can do -// R calls should process a request from a polars thread worker to run an R function -fn make_closure_serve_r() -> impl Fn( - (ParRObj, polars::prelude::Series), -) -> Result> { - |(probj, s): (ParRObj, pl::Series)| -> Result> { - //unpack user-R-function - let f = probj.0.as_function().ok_or_else(|| { - extendr_api::error::Error::Other(format!( - "provided input is not an R function but a: {:?}", - probj.0 - )) - })?; - - // call user-R-function with Series as input, return Robj (likeliy as Series) - let rseries_robj = f.call(pairlist!(Series(s)))?; - - // return of user-R-function may not be Series, return Err if so - let s = Series::any_robj_to_pl_series_result(rseries_robj)?; - - Ok(s) - } +// This is the standard way the main thread which can call the R session, +// should process a request from a polars thread worker to run an R function +fn serve_r((probj, s): (ParRObj, pl::Series)) -> Result> { + //unpack user-R-function + let f = probj.0.as_function().ok_or_else(|| { + extendr_api::error::Error::Other(format!( + "provided input is not an R function but a: {:?}", + probj.0 + )) + })?; + + // call user-R-function with Series as input, return Robj (likeliy as Series) + let rseries_robj = f.call(pairlist!(Series(s)))?; + + // return of user-R-function may not be Series, return Err if so + let s = Series::any_robj_to_pl_series_result(rseries_robj)?; + + Ok(s) } // This functions allows to call .collect() on polars lazy frame. A lazy frame may contain user defined functions // which could call R from any spawned thread by polars. This function is a bridge between multithraedded polars // and mostly single threaded only R - -//handle_thread_r_request() is a special case of the concurrent_handler() -//handle_thread_r_request passes 2 closures to concurrent_handler() + 1 closure to lazyframe -//1: What to run (polars collect), 2: what handler to execute code -//3: Expr.map or such, passes an R user defined function in a closure to the lazyframe. This closure describes -//how to request R functions run on the mainthread with a ThreadCom object. -pub fn collect_with_r_func_support(lazy_df: pl::LazyFrame) -> pl::PolarsResult { - let res_res_df = concurrent_handler( - // Closure 1: start concurrent handler and get final result - // This what we want to do in the first place ... to run LazyFrame.collect() thread-safe - //this closure gets spawned in a child of main thred, tc is a ThreadCom struct +pub fn collect_with_r_func_support(lazy_df: pl::LazyFrame) -> RResult { + let new_df = concurrent_handler( + // closure 1: spawned by main thread + // tc is a ThreadCom which any child thread can use to submit R jobs to main thread move |tc| { - //start polars .collect, which it self can spawn many new threads + // get return value let retval = lazy_df.collect(); - //collect done, we're all done know - //drop local and global ThreadCom clone, signals to main/R-serving thread to shut down. + // drop the last two ThreadCom clones, signals to main/R-serving thread to shut down. ThreadCom::kill_global(&CONFIG); drop(tc); - //taadaaa return value retval }, - //closure 2 - //how should the R-serving mainthread handle a user function requst? - //synopsis: run the user-R-function, input/ouput is a Series - make_closure_serve_r(), - //CONFIG is "global variable" where threads can request a new ThreadCom + // closure 2: how to serve polars worker R job request in main thread + serve_r, + //CONFIG is "global variable" where any new thread can request a clone of ThreadCom to establish contact with main thread &CONFIG, - ); - //concurrent handling complete - - //bubble on concurrent errors - let res_df = res_res_df.map_err(|err| { - pl::polars_err!( - ComputeError: "error via polars concurrent R handler {:?}", err, - ) - })?; - - //bubble polars errors - let new_df = res_df?; + ) + .map_err(|err| RPolarsErr::new().plain(err.to_string()))? + .map_err(polars_to_rpolars_err); //wrap ok - Ok(DataFrame(new_df)) + Ok(DataFrame(new_df?)) } -pub fn profile_with_r_func_support( - lazy_df: pl::LazyFrame, -) -> RResult<(pl::DataFrame, pl::DataFrame)> { - let res_res_df: Result< - Result<(pl::DataFrame, pl::DataFrame), pl::PolarsError>, - Box, - > = concurrent_handler( +pub fn profile_with_r_func_support(lazy_df: pl::LazyFrame) -> RResult<(DataFrame, DataFrame)> { + concurrent_handler( move |tc| { let retval = lazy_df.profile(); ThreadCom::kill_global(&CONFIG); drop(tc); retval }, - make_closure_serve_r(), + serve_r, &CONFIG, - ); - - res_res_df - .map_err(|err| RPolarsErr::new().plain(err.to_string()))? - .map_err(polars_to_rpolars_err) + ) + .map_err(|err| RPolarsErr::new().plain(err.to_string()))? + .map_err(polars_to_rpolars_err) + .map(|(result_df, profile_df)| (DataFrame(result_df), DataFrame(profile_df))) } #[derive(Debug, Default)] diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 62c95cf34..977642b75 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -68,7 +68,7 @@ impl LazyFrame { } pub fn collect(&self) -> RResult { - collect_with_r_func_support(self.clone().0).when("calling $collect() on LazyFrame") + collect_with_r_func_support(self.clone().0) } fn first(&self) -> Self { @@ -402,8 +402,7 @@ impl LazyFrame { } fn profile(&self) -> RResult { - profile_with_r_func_support(self.0.clone()) - .map(|(r, p)| list!(result = RDF(r), profile = RDF(p))) + profile_with_r_func_support(self.0.clone()).map(|(r, p)| list!(result = r, profile = p)) } fn explode(&self, columns: Robj, dotdotdot_args: Robj) -> RResult { From 17d15a5ec82a5877e15986e697dd6db1cfbd7be7 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 4 Aug 2023 01:30:20 +0200 Subject: [PATCH 07/19] add robj_to!(i32 + f64 ) + improve err msg --- R/extendr-wrappers.R | 4 ++ src/rust/src/rlib.rs | 13 ++++ src/rust/src/series.rs | 16 ++--- src/rust/src/utils/mod.rs | 140 +++++++++++++++++++++++++++++------- tests/testthat/test-bit64.R | 52 +++++++++++--- 5 files changed, 181 insertions(+), 44 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 02eff7686..27fe32153 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -53,10 +53,14 @@ dtype_str_repr <- function(dtype) .Call(wrap__dtype_str_repr, dtype) test_robj_to_usize <- function(robj) .Call(wrap__test_robj_to_usize, robj) +test_robj_to_f64 <- function(robj) .Call(wrap__test_robj_to_f64, robj) + test_robj_to_i64 <- function(robj) .Call(wrap__test_robj_to_i64, robj) test_robj_to_u32 <- function(robj) .Call(wrap__test_robj_to_u32, robj) +test_robj_to_i32 <- function(robj) .Call(wrap__test_robj_to_i32, robj) + test_print_string <- function(s) invisible(.Call(wrap__test_print_string, s)) RPolarsErr <- new.env(parent = emptyenv()) diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 40e81102c..fe995be36 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -261,6 +261,12 @@ pub fn dtype_str_repr(dtype: Robj) -> RResult { fn test_robj_to_usize(robj: Robj) -> RResult { robj_to!(usize, robj).map(rdbg) } + +#[extendr] +fn test_robj_to_f64(robj: Robj) -> RResult { + robj_to!(f64, robj).map(rdbg) +} + #[extendr] fn test_robj_to_i64(robj: Robj) -> RResult { robj_to!(i64, robj).map(rdbg) @@ -271,6 +277,11 @@ fn test_robj_to_u32(robj: Robj) -> RResult { robj_to!(u32, robj).map(rdbg) } +#[extendr] +fn test_robj_to_i32(robj: Robj) -> RResult { + robj_to!(i32, robj).map(rdbg) +} + #[extendr] fn test_print_string(s: String) { rprintln!("{}", s); @@ -299,7 +310,9 @@ extendr_module! { fn dtype_str_repr; fn test_robj_to_usize; + fn test_robj_to_f64; fn test_robj_to_i64; fn test_robj_to_u32; + fn test_robj_to_i32; fn test_print_string; } diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index e7b3ede74..6489a2333 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -5,17 +5,17 @@ /// Therefore there annoyingly exists pl::Series and Series use crate::apply_input; use crate::apply_output; +use crate::conversion_r_to_s::robjname2series; +use crate::conversion_s_to_r::pl_series_to_list; use crate::handle_type; use crate::make_r_na_fun; +use crate::rdataframe::DataFrame; use crate::rdatatype::RPolarsDataType; use crate::robj_to; -use crate::utils::{r_error_list, r_result_list}; - -use crate::conversion_r_to_s::robjname2series; -use crate::conversion_s_to_r::pl_series_to_list; -use crate::rdataframe::DataFrame; +use crate::rpolarserr::*; use crate::utils::extendr_concurrent::ParRObj; use crate::utils::wrappers::null_to_opt; +use crate::utils::{r_error_list, r_result_list}; use crate::lazy::dsl::Expr; use extendr_api::{extendr, prelude::*, rprintln, Rinternals}; @@ -57,9 +57,9 @@ impl From<&Expr> for pl::PolarsResult { #[extendr] impl Series { //utility methods - pub fn new(x: Robj, name: &str) -> std::result::Result { - robjname2series(&ParRObj(x), name) - .map_err(|err| format!("in Series.new: {:?}", err)) + pub fn new(x: Robj, name: Robj) -> RResult { + robjname2series(&ParRObj(x), robj_to!(Option, str, name)?.unwrap_or("")) + .map_err(polars_to_rpolars_err) .map(Series) } diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 4709ce7af..6302c91c1 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -248,6 +248,8 @@ const R_MIN_INTEGERISH: f64 = -4503599627370496.0; //const I64_MAX_INTO_F64: f64 = i64::MAX as f64; const USIZE_MAX_INTO_F64: f64 = usize::MAX as f64; const U32_MAX_INTO_F64: f64 = u32::MAX as f64; +const I32_MIN_INTO_F64: f64 = i32::MIN as f64; +const I32_MAX_INTO_F64: f64 = i32::MAX as f64; pub const BIT64_NA_ECODING: i64 = -9223372036854775808i64; const WITHIN_INT_MAX: &str = @@ -256,6 +258,8 @@ const WITHIN_INT_MIN: &str = "cannot exceeds double->integer unambigious conversion bound of -(2^52)= -4503599627370496.0"; const WITHIN_USIZE_MAX: &str = "cannot exceed the upper bound for usize"; const WITHIN_U32_MAX: &str = "cannot exceed the upper bound for u32 of 4294967295"; +const WITHIN_I32_MIN: &str = "cannot exceed the upper bound for i32 of 2147483647"; +const WITHIN_I32_MAX: &str = "cannot exceed the upper lower for i32 of -2147483648"; const WITHIN_U8_MAX: &str = "cannot exceed the upper bound for u8 of 255"; const NOT_NAN: &str = "cannot be NaN"; const NO_LESS_THAN_ONE: &str = "cannot be less than one"; @@ -299,17 +303,6 @@ pub fn try_f64_into_i64(x: f64) -> RResult { _ if x.is_nan() => base_err.misvalued(NOT_NAN), _ if x < R_MIN_INTEGERISH => base_err.misvalued(WITHIN_INT_MIN), _ if x > R_MAX_INTEGERISH => base_err.misvalued(WITHIN_INT_MAX), - // should not matter - // _ if x > I64_MAX_INTO_F64 => Err(format!( - // "the value {} cannot exceed i64::MAX {}", - // x, - // i64::MAX - // )), - // _ if x < I64_MIN_INTO_F64 => Err(format!( - // "the value {} cannot exceed i64::MIN {}", - // x, - // i64::MIN - // )) _ => Ok(x as i64), } } @@ -324,6 +317,16 @@ pub fn try_f64_into_u32(x: f64) -> RResult { } } +pub fn try_f64_into_i32(x: f64) -> RResult { + let f_base_err = || rerr().bad_val(rdbg(x)); + match x { + _ if x.is_nan() => f_base_err().misvalued(NOT_NAN), + _ if x < I32_MIN_INTO_F64 => f_base_err().misvalued(WITHIN_I32_MIN), + _ if x > I32_MAX_INTO_F64 => f_base_err().misvalued(WITHIN_I32_MAX), + _ => Ok(x as i32), + } +} + pub fn try_i64_into_u64(x: i64) -> RResult { let base_err = rerr().bad_val(rdbg(x)); match x { @@ -349,6 +352,15 @@ pub fn try_i64_into_u32(x: i64) -> RResult { } } +pub fn try_i64_into_i32(x: i64) -> RResult { + let f_base_err = || rerr().bad_val(rdbg(x)); + match x { + _ if x < i32::MIN as i64 => f_base_err().misvalued(WITHIN_I32_MIN), + _ if x > i32::MAX as i64 => f_base_err().misvalued(WITHIN_I32_MAX), + _ => Ok(x as i32), + } +} + pub fn try_i64_into_u8(x: i64) -> RResult { let base_err = rerr().bad_val(rdbg(x)); match x { @@ -466,6 +478,21 @@ pub fn unpack_r_result_list(robj: extendr_api::Robj) -> RResult { res } +//None if not real or Na. +pub fn robj_bit64_to_opt_i64(robj: Robj) -> Option { + robj.as_real() + .and_then(|v| i64::try_from(v.to_bits()).ok()) + .filter(|val| *val != crate::utils::BIT64_NA_ECODING) +} + +pub fn robj_parse_str_to_t(robj: Robj) -> RResult +where + T: std::str::FromStr, + ::Err: std::error::Error, +{ + Ok(robj.as_str().unwrap_or("").parse::()?) +} + pub fn robj_to_char(robj: extendr_api::Robj) -> RResult { let robj = unpack_r_result_list(robj)?; let mut fchar_iter = if let Some(char_str) = robj.as_str() { @@ -501,28 +528,79 @@ pub fn robj_to_usize(robj: extendr_api::Robj) -> RResult { robj_to_u64(robj).and_then(try_u64_into_usize) } +fn err_no_nan() -> RResult { + rerr().plain("any NA value is not allowed here".to_string()) +} + +fn err_no_scalar() -> RResult { + rerr().plain("only a scalar value is allowed here (length = 1)".to_string()) +} + pub fn robj_to_i64(robj: extendr_api::Robj) -> RResult { let robj = unpack_r_result_list(robj)?; use extendr_api::*; + + return match (robj.rtype(), robj.len()) { + (_, 0 | 2..) => Some(err_no_scalar()), + (Rtype::Strings, 1) => Some(robj_parse_str_to_t(robj.clone())), + (Rtype::Doubles, 1) if robj.inherits("integer64") => { + robj_bit64_to_opt_i64(robj.clone()).map(Ok) + } + (Rtype::Doubles, 1) => robj.as_real().map(try_f64_into_i64), + (Rtype::Integers, 1) => robj.as_integer().map(i64::from).map(Ok), + + (_, _) => { + Some(rerr().plain("does not support this R type for this conversion".to_string())) + } + } + .unwrap_or_else(err_no_nan) + .bad_robj(&robj) + .mistyped(tn::()) + .when("converting into type"); +} + +pub fn robj_to_i32(robj: extendr_api::Robj) -> RResult { + let robj = unpack_r_result_list(robj)?; + use extendr_api::*; + return match (robj.rtype(), robj.len()) { - (Rtype::Strings, 1) => robj - .as_str() - .unwrap_or("") - .parse::() - .ok(), - //specialized integer64 conversion - (Rtype::Doubles, 1) if robj.inherits("integer64") => robj - .as_real() - .and_then(|v| i64::try_from(v.to_bits()).ok()) - .filter(|val| *val != crate::utils::BIT64_NA_ECODING), - //from R doubles or integers - (Rtype::Doubles, 1) => robj.as_real().and_then(|v| try_f64_into_i64(v).ok()), - (Rtype::Integers, 1) => robj.as_integer().map(i64::from), - (_, _) => None, + (_, 0 | 2..) => Some(err_no_scalar()), + (Rtype::Strings, 1) => Some(robj_parse_str_to_t(robj.clone())), + (Rtype::Doubles, 1) if robj.inherits("integer64") => { + robj_bit64_to_opt_i64(robj.clone()).map(try_i64_into_i32) + } + (Rtype::Doubles, 1) => robj.as_real().map(try_f64_into_i32), + (Rtype::Integers, 1) => robj.as_integer().map(i32::from).map(Ok), + (_, _) => { + Some(rerr().plain("does not support this R type for this conversion".to_string())) + } } - .ok_or(RPolarsErr::new()) + .unwrap_or_else(err_no_nan) .bad_robj(&robj) - .mistyped(tn::()); + .mistyped(tn::()) + .when("converting into type"); +} + +pub fn robj_to_f64(robj: extendr_api::Robj) -> RResult { + let robj = unpack_r_result_list(robj)?; + use extendr_api::*; + + return match (robj.rtype(), robj.len()) { + (_, 0 | 2..) => Some(err_no_scalar()), + (Rtype::Strings, 1) => Some(robj_parse_str_to_t(robj.clone())), + (Rtype::Doubles, 1) if robj.inherits("integer64") => { + robj_bit64_to_opt_i64(robj.clone()).map(|v| Ok(v as f64)) + } + (Rtype::Doubles, 1) => robj.as_real().map(Ok), + (Rtype::Integers, 1) => robj.as_integer().map(|v| Ok(v as f64)), + (_, _) => { + Some(rerr().plain("does not support this R type for this conversion".to_string())) + } + } + .unwrap_or_else(err_no_nan) + .bad_robj(&robj) + .mistyped(tn::()) + .when("converting into type"); } pub fn robj_to_u64(robj: extendr_api::Robj) -> RResult { @@ -671,10 +749,18 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_usize($a) }; + (f64, $a:ident) => { + $crate::utils::robj_to_f64($a) + }; + (i64, $a:ident) => { $crate::utils::robj_to_i64($a) }; + (i32, $a:ident) => { + $crate::utils::robj_to_i32($a) + }; + (u64, $a:ident) => { $crate::utils::robj_to_u64($a) }; diff --git a/tests/testthat/test-bit64.R b/tests/testthat/test-bit64.R index 26b704ba7..9e6e7e415 100644 --- a/tests/testthat/test-bit64.R +++ b/tests/testthat/test-bit64.R @@ -23,11 +23,22 @@ test_that("from r to series and reverse", { test_that("robj_to! from bit64", { testthat::skip_if_not_installed("bit64") + + expect_identical( + unwrap(test_robj_to_f64(bit64::as.integer64(1))), + "1.0" + ) + expect_identical( unwrap(test_robj_to_i64(bit64::as.integer64(1))), as.character(bit64::as.integer64(1)) ) + expect_identical( + unwrap(test_robj_to_i32(bit64::as.integer64(2^27))), + as.character(2^27) + ) + expect_identical( unwrap(test_robj_to_u32(bit64::as.integer64(2^27))), as.character(2^27) @@ -44,34 +55,57 @@ test_that("robj_to! from bit64", { ) # NO NA + expect_rpolarserr( - unwrap(test_robj_to_i64(bit64::as.integer64(NA)), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + unwrap(test_robj_to_f64(bit64::NA_integer64_), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) + expect_rpolarserr( - unwrap(test_robj_to_usize(bit64::as.integer64(NA)), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + unwrap(test_robj_to_i64(bit64::NA_integer64_), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") + ) + + expect_rpolarserr( + unwrap(test_robj_to_i32(bit64::NA_integer64_), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") + ) + + expect_rpolarserr( + unwrap(test_robj_to_usize(bit64::NA_integer64_), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) # NO OVERFLOW expect_rpolarserr( - unwrap(test_robj_to_u32(2^57), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + unwrap(test_robj_to_u32(2^57)), + c("BadArgument", "When", "TypeMismatch", "BadValue", "ValueOutOfScope", "BadValue") + ) + + expect_rpolarserr( + unwrap(test_robj_to_i32(2^37), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "ValueOutOfScope", "BadValue") ) # NO NEGATIVE expect_rpolarserr( unwrap(test_robj_to_usize(bit64::as.integer64(-1)), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) expect_rpolarserr( unwrap(test_robj_to_u32(bit64::as.integer64(-1)), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) # NO length>1 expect_rpolarserr( unwrap(test_robj_to_usize(bit64::as.integer64(c(1:2))), call = NULL), - c("BadArgument", "TypeMismatch", "BadValue") + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") + ) + + # NO length<1 + expect_rpolarserr( + unwrap(test_robj_to_usize(bit64::as.integer64(numeric(0))), call = NULL), + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) }) From 3ada33f642297958f8effef872e10344333ba95b Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 5 Aug 2023 01:08:21 +0200 Subject: [PATCH 08/19] update param explanations --- R/lazyframe__lazy.R | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 22668c914..4409e41c7 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -265,14 +265,21 @@ LazyFrame_filter = "use_extendr_wrapper" #' @title New DataFrame from LazyFrame_object$collect() #' @description collect DataFrame by lazy query -#' @param type_coercion Boolean. Do type coercion optimization. -#' @param predicate_pushdown Boolean. Do predicate pushdown optimization. -#' @param projection_pushdown Boolean. Do projection pushdown optimization. -#' @param simplify_expression Boolean. Run simplify expressions optimization. -#' @param no_optimization Boolean. Turn off (certain) optimizations. -#' @param slice_pushdown Boolean. Slice pushdown optimization. -#' @param common_subplan_elimination Boolean. Will try to cache branching subplans that occur on -#' self-joins or unions. +#' @param type_coercion Boolean. Coerce types such that operations succeed and run on minimal +#' required memory. +#' @param predicate_pushdown Boolean. Applies filters as early as possible/ at scan level. +#' @param projection_pushdown Boolean. Applies filters as early as possible/ at scan level. +#' @param simplify_expression Boolean. Cache subtrees/file scans that are used by multiple subtrees +#' in the query plan. +#' @param no_optimization Boolean. Turn of the following: +#' predicate_pushdown = FALSE +#' projection_pushdown = FALSE +#' slice_pushdown = FALSE +#' common_subplan_elimination = FALSE +#' @param slice_pushdown Boolean. Only load the required slice from the scan level. Don't +#' materialize sliced outputs (e.g. join.head(10)). +#' @param common_subplan_elimination Boolean. Cache subtrees/file scans that are used by multiple +#' subtrees in the query plan. #' @param streaming Boolean. Run parts of the query in a streaming fashion #' (this is in an alpha state) #' @param collect_in_background Boolean. Detach this query from R session. Computation will start From cb914ba44ca7d84c39259d9cc6bba940858b3690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Sat, 5 Aug 2023 01:09:07 +0200 Subject: [PATCH 09/19] Update R/lazyframe__lazy.R Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- R/lazyframe__lazy.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 4409e41c7..0e0b0316f 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -314,7 +314,7 @@ LazyFrame_collect = function( common_subplan_elimination = FALSE } - collect_f = if( isTRUE(collect_in_background)) { + collect_f = if(isTRUE(collect_in_background)) { .pr$LazyFrame$collect_background } else { .pr$LazyFrame$collect From ccdafbaa40901b28f3d08536bce2b42131b6b951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Sat, 5 Aug 2023 01:09:42 +0200 Subject: [PATCH 10/19] Update R/lazyframe__lazy.R Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- R/lazyframe__lazy.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 0e0b0316f..24eaa092b 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -286,7 +286,7 @@ LazyFrame_filter = "use_extendr_wrapper" #' in background. Get a handle which later can be converted into the resulting DataFrame. Useful #' in interactive mode to not lock R session. #' @details -#' Note: use `$fetch()` if you want to run your query on the first `n` rows only. +#' Note: use `$fetch(n)` if you want to run your query on the first `n` rows only. #' This can be a huge time saver in debugging queries. #' @keywords LazyFrame DataFrame_new #' @return collected `DataFrame` or if colkect From c66702c49f038e5d7b1e8bb348e3d92d63c63211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Sat, 5 Aug 2023 01:20:06 +0200 Subject: [PATCH 11/19] Update tests/testthat/test-lazy_profile.R Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- tests/testthat/test-lazy_profile.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-lazy_profile.R b/tests/testthat/test-lazy_profile.R index 202092da7..cc96e676e 100644 --- a/tests/testthat/test-lazy_profile.R +++ b/tests/testthat/test-lazy_profile.R @@ -1,5 +1,5 @@ test_that("can modify lazy profile settings", { - #some way to check if .pr$LazyFrame$optimization_toggle works + # TODO: some way to check if .pr$LazyFrame$optimization_toggle works #toggle settings #read back settings From 0115b96f2f905f03d441b2ad96a2530b2c74b287 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 5 Aug 2023 01:29:55 +0200 Subject: [PATCH 12/19] refine collect profile docs --- R/lazyframe__lazy.R | 23 ++++++++++++++--------- man/LazyFrame_collect.Rd | 23 +++++++++++++++-------- man/LazyFrame_profile.Rd | 23 ++++++++++++++--------- 3 files changed, 43 insertions(+), 26 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 24eaa092b..8dee0a2e4 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -998,24 +998,29 @@ LazyFrame_dtypes = method_as_property(function() { #' @return List of two DataFrames, (collected result, profile stats) #' @examples #' -#' #Use $profile() to compare two queries +#' ## Simplest use case +#' pl$LazyFrame()$select(pl$lit(2) + 2)$profile() #' -#' # print one '.', take a series convert to r vector, take first value, add 5 -#' r_func = \(s) {cat(".");s$to_r()[1] + 5} +#' ## Use $profile() to compare two queries #' -#' # map each Species-group of each numeric column with an R function, takes ~7000us slow ! +#' # -1- map each Species-group with native polars, takes ~120us only #' pl$LazyFrame(iris)$ -#' sort("Sepal.Length")$ #for no specific reason +#' sort("Sepal.Length")$ #' groupby("Species", maintain_order = TRUE)$ -#' agg(pl$col(pl$Float64)$apply(r_func))$ +#' agg(pl$col(pl$Float64)$first() + 5 )$ #' profile() #' -#' # map each Species-group with native polars, takes ~120us better +#' # -2- map each Species-group of each numeric column with an R function, takes ~7000us (slow!) +#' +#' # some R function, prints `.` for each time called by polars +#' r_func = \(s) {cat(".");s$to_r()[1] + 5} +#' #' pl$LazyFrame(iris)$ -#' sort("Sepal.Length")$ +#' sort("Sepal.Length")$ #for no specific reason #' groupby("Species", maintain_order = TRUE)$ -#' agg(pl$col(pl$Float64)$first() + 5 )$ +#' agg(pl$col(pl$Float64)$apply(r_func))$ #' profile() +#' LazyFrame_profile = function() { .pr$LazyFrame$profile(self) |> unwrap("in $profile()") } diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index 7ecf73115..47e5a2ecd 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -17,20 +17,27 @@ LazyFrame_collect( ) } \arguments{ -\item{type_coercion}{Boolean. Do type coercion optimization.} +\item{type_coercion}{Boolean. Coerce types such that operations succeed and run on minimal +required memory.} -\item{predicate_pushdown}{Boolean. Do predicate pushdown optimization.} +\item{predicate_pushdown}{Boolean. Applies filters as early as possible/ at scan level.} -\item{projection_pushdown}{Boolean. Do projection pushdown optimization.} +\item{projection_pushdown}{Boolean. Applies filters as early as possible/ at scan level.} -\item{simplify_expression}{Boolean. Run simplify expressions optimization.} +\item{simplify_expression}{Boolean. Cache subtrees/file scans that are used by multiple subtrees +in the query plan.} -\item{no_optimization}{Boolean. Turn off (certain) optimizations.} +\item{no_optimization}{Boolean. Turn of the following: +predicate_pushdown = FALSE +projection_pushdown = FALSE +slice_pushdown = FALSE +common_subplan_elimination = FALSE} -\item{slice_pushdown}{Boolean. Slice pushdown optimization.} +\item{slice_pushdown}{Boolean. Only load the required slice from the scan level. Don't +materialize sliced outputs (e.g. join.head(10)).} -\item{common_subplan_elimination}{Boolean. Will try to cache branching subplans that occur on -self-joins or unions.} +\item{common_subplan_elimination}{Boolean. Cache subtrees/file scans that are used by multiple +subtrees in the query plan.} \item{streaming}{Boolean. Run parts of the query in a streaming fashion (this is in an alpha state)} diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index aa307e71a..45b35e58c 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -18,23 +18,28 @@ The units of the timings are microseconds. } \examples{ -#Use $profile() to compare two queries +## Simplest use case +pl$LazyFrame()$select(pl$lit(2) + 2)$profile() -# print one '.', take a series convert to r vector, take first value, add 5 -r_func = \(s) {cat(".");s$to_r()[1] + 5} +## Use $profile() to compare two queries -# map each Species-group of each numeric column with an R function, takes ~7000us slow ! +# -1- map each Species-group with native polars, takes ~120us only pl$LazyFrame(iris)$ - sort("Sepal.Length")$ #for no specific reason + sort("Sepal.Length")$ groupby("Species", maintain_order = TRUE)$ - agg(pl$col(pl$Float64)$apply(r_func))$ + agg(pl$col(pl$Float64)$first() + 5 )$ profile() -# map each Species-group with native polars, takes ~120us better +# -2- map each Species-group of each numeric column with an R function, takes ~7000us (slow!) + +# some R function, prints `.` for each time called by polars +r_func = \(s) {cat(".");s$to_r()[1] + 5} + pl$LazyFrame(iris)$ - sort("Sepal.Length")$ + sort("Sepal.Length")$ #for no specific reason groupby("Species", maintain_order = TRUE)$ - agg(pl$col(pl$Float64)$first() + 5 )$ + agg(pl$col(pl$Float64)$apply(r_func))$ profile() + } \keyword{LazyFrame} From ca80fe57dbb2d584e847960dc3c74d46f793fe88 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 5 Aug 2023 01:30:04 +0200 Subject: [PATCH 13/19] skip to_string() --- src/rust/src/utils/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 6302c91c1..8907c0364 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -533,7 +533,7 @@ fn err_no_nan() -> RResult { } fn err_no_scalar() -> RResult { - rerr().plain("only a scalar value is allowed here (length = 1)".to_string()) + rerr().plain("only a scalar value is allowed here (length = 1)") } pub fn robj_to_i64(robj: extendr_api::Robj) -> RResult { From cff0b73347de6ae713051e0c6175fd7c2bb7e2ca Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 5 Aug 2023 01:35:07 +0200 Subject: [PATCH 14/19] remove for no spec reason --- R/lazyframe__lazy.R | 2 +- man/LazyFrame_collect.Rd | 2 +- man/LazyFrame_profile.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 8dee0a2e4..d2d6e7ea5 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1016,7 +1016,7 @@ LazyFrame_dtypes = method_as_property(function() { #' r_func = \(s) {cat(".");s$to_r()[1] + 5} #' #' pl$LazyFrame(iris)$ -#' sort("Sepal.Length")$ #for no specific reason +#' sort("Sepal.Length")$ #' groupby("Species", maintain_order = TRUE)$ #' agg(pl$col(pl$Float64)$apply(r_func))$ #' profile() diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index 47e5a2ecd..e96646d39 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -53,7 +53,7 @@ collected \code{DataFrame} or if colkect collect DataFrame by lazy query } \details{ -Note: use \verb{$fetch()} if you want to run your query on the first \code{n} rows only. +Note: use \verb{$fetch(n)} if you want to run your query on the first \code{n} rows only. This can be a huge time saver in debugging queries. } \examples{ diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index 45b35e58c..3e929acf9 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -36,7 +36,7 @@ pl$LazyFrame(iris)$ r_func = \(s) {cat(".");s$to_r()[1] + 5} pl$LazyFrame(iris)$ - sort("Sepal.Length")$ #for no specific reason + sort("Sepal.Length")$ groupby("Species", maintain_order = TRUE)$ agg(pl$col(pl$Float64)$apply(r_func))$ profile() From 92d6be6fa015ca2c403449525b5ec4903a4bf284 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 5 Aug 2023 01:38:32 +0200 Subject: [PATCH 15/19] export GIT_DIF_TARGET=main make fmt --- R/lazyframe__lazy.R | 29 +++++++++++++++-------------- man/LazyFrame_profile.Rd | 7 +++++-- tests/testthat/test-bit64.R | 4 ++-- tests/testthat/test-lazy.R | 2 +- tests/testthat/test-lazy_profile.R | 24 ++++++++++-------------- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d2d6e7ea5..9c18a02cc 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -292,17 +292,15 @@ LazyFrame_filter = "use_extendr_wrapper" #' @return collected `DataFrame` or if colkect #' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() LazyFrame_collect = function( - type_coercion = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - no_optimization = FALSE, - slice_pushdown = TRUE, - common_subplan_elimination = TRUE, - streaming = FALSE, - collect_in_background = FALSE -) { - + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + no_optimization = FALSE, + slice_pushdown = TRUE, + common_subplan_elimination = TRUE, + streaming = FALSE, + collect_in_background = FALSE) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE @@ -314,7 +312,7 @@ LazyFrame_collect = function( common_subplan_elimination = FALSE } - collect_f = if(isTRUE(collect_in_background)) { + collect_f = if (isTRUE(collect_in_background)) { .pr$LazyFrame$collect_background } else { .pr$LazyFrame$collect @@ -1007,13 +1005,16 @@ LazyFrame_dtypes = method_as_property(function() { #' pl$LazyFrame(iris)$ #' sort("Sepal.Length")$ #' groupby("Species", maintain_order = TRUE)$ -#' agg(pl$col(pl$Float64)$first() + 5 )$ +#' agg(pl$col(pl$Float64)$first() + 5)$ #' profile() #' #' # -2- map each Species-group of each numeric column with an R function, takes ~7000us (slow!) #' #' # some R function, prints `.` for each time called by polars -#' r_func = \(s) {cat(".");s$to_r()[1] + 5} +#' r_func = \(s) { +#' cat(".") +#' s$to_r()[1] + 5 +#' } #' #' pl$LazyFrame(iris)$ #' sort("Sepal.Length")$ diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index 3e929acf9..4b30477aa 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -27,13 +27,16 @@ pl$LazyFrame()$select(pl$lit(2) + 2)$profile() pl$LazyFrame(iris)$ sort("Sepal.Length")$ groupby("Species", maintain_order = TRUE)$ - agg(pl$col(pl$Float64)$first() + 5 )$ + agg(pl$col(pl$Float64)$first() + 5)$ profile() # -2- map each Species-group of each numeric column with an R function, takes ~7000us (slow!) # some R function, prints `.` for each time called by polars -r_func = \(s) {cat(".");s$to_r()[1] + 5} +r_func = \(s) { + cat(".") + s$to_r()[1] + 5 +} pl$LazyFrame(iris)$ sort("Sepal.Length")$ diff --git a/tests/testthat/test-bit64.R b/tests/testthat/test-bit64.R index 9e6e7e415..cf32654bc 100644 --- a/tests/testthat/test-bit64.R +++ b/tests/testthat/test-bit64.R @@ -82,7 +82,7 @@ test_that("robj_to! from bit64", { c("BadArgument", "When", "TypeMismatch", "BadValue", "ValueOutOfScope", "BadValue") ) - expect_rpolarserr( + expect_rpolarserr( unwrap(test_robj_to_i32(2^37), call = NULL), c("BadArgument", "When", "TypeMismatch", "BadValue", "ValueOutOfScope", "BadValue") ) @@ -94,7 +94,7 @@ test_that("robj_to! from bit64", { ) expect_rpolarserr( unwrap(test_robj_to_u32(bit64::as.integer64(-1)), call = NULL), - c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") + c("BadArgument", "When", "TypeMismatch", "BadValue", "PlainErrorMessage") ) # NO length>1 diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 2afd5da37..70d355769 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -644,5 +644,5 @@ test_that("width", { test_that("with_row_count", { lf = pl$LazyFrame(mtcars) - expect_identical(lf$with_row_count("idx", 42)$select(pl$col("idx"))$collect()$to_data_frame()$idx, as.double(42:(41+nrow(mtcars)))) + expect_identical(lf$with_row_count("idx", 42)$select(pl$col("idx"))$collect()$to_data_frame()$idx, as.double(42:(41 + nrow(mtcars)))) }) diff --git a/tests/testthat/test-lazy_profile.R b/tests/testthat/test-lazy_profile.R index cc96e676e..90a53e5a7 100644 --- a/tests/testthat/test-lazy_profile.R +++ b/tests/testthat/test-lazy_profile.R @@ -1,23 +1,21 @@ test_that("can modify lazy profile settings", { # TODO: some way to check if .pr$LazyFrame$optimization_toggle works - #toggle settings - #read back settings - #compare - expect_identical(1,1) - + # toggle settings + # read back settings + # compare + expect_identical(1, 1) }) test_that("$profile", { - - #profile minimal test + # profile minimal test p0 = pl$LazyFrame()$select(pl$lit(1:3)$alias("x"))$profile() - expect_true(inherits(p0,"list")) - expect_identical(p0$result$to_list(),list(x = 1:3)) - expect_identical(p0$profile$columns,c("node","start","end")) + expect_true(inherits(p0, "list")) + expect_identical(p0$result$to_list(), list(x = 1:3)) + expect_identical(p0$profile$columns, c("node", "start", "end")) - #profile supports with and without R functions + # profile supports with and without R functions p1 = pl$LazyFrame(iris)$ sort("Sepal.Length")$ groupby("Species", maintain_order = TRUE)$ @@ -26,7 +24,7 @@ test_that("$profile", { r_func = \(s) s$to_r()[1] + 5 p2 = pl$LazyFrame(iris)$ - sort("Sepal.Length")$ #for no specific reason + sort("Sepal.Length")$ # for no specific reason groupby("Species", maintain_order = TRUE)$ agg(pl$col(pl$Float64)$apply(r_func))$ profile() @@ -36,6 +34,4 @@ test_that("$profile", { p2$result$as_data_frame(), p1$result$as_data_frame() ) - }) - From 0d0d0d4e8c4e59e81b8c2bfb475bf5192150cbd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Tue, 8 Aug 2023 15:07:22 +0200 Subject: [PATCH 16/19] Apply suggestions from code review Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- R/lazyframe__lazy.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d1c20a925..2a2f7e3a8 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -289,8 +289,8 @@ LazyFrame_filter = "use_extendr_wrapper" #' Note: use `$fetch(n)` if you want to run your query on the first `n` rows only. #' This can be a huge time saver in debugging queries. #' @keywords LazyFrame DataFrame_new -#' @return collected `DataFrame` or if colkect -#' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() +#' @return A `DataFrame` +#' @examples pl$LazyFrame(iris)$filter(pl$col("Species") == "setosa")$collect() LazyFrame_collect = function( type_coercion = TRUE, predicate_pushdown = TRUE, @@ -988,12 +988,12 @@ LazyFrame_dtypes = method_as_property(function() { }) #' @title Collect and profile a lazy query. -#' @description This will run the query and return a tuple containing the materialized DataFrame and +#' @description This will run the query and return a list containing the materialized DataFrame and #' a DataFrame that contains profiling information of each node that is executed. #' @details The units of the timings are microseconds. #' #' @keywords LazyFrame -#' @return List of two DataFrames, (collected result, profile stats) +#' @return List of two `DataFrame`s: one with the collected result, the other with the timings of each step. #' @examples #' #' ## Simplest use case From 78b777c1decf54246e68ca391ae2aa998ec1ec47 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Tue, 8 Aug 2023 15:11:32 +0200 Subject: [PATCH 17/19] news + fmt + roxygen --- NEWS.md | 1 + R/expr__expr.R | 8 ++++---- R/lazyframe__lazy.R | 1 - man/Expr_pow.Rd | 8 ++++---- man/LazyFrame_collect.Rd | 4 ++-- man/LazyFrame_profile.Rd | 4 ++-- man/pl_corr.Rd | 2 +- man/pl_cov.Rd | 2 +- man/pl_rolling_corr.Rd | 2 +- man/pl_rolling_cov.Rd | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/NEWS.md b/NEWS.md index d31b3d584..c1ee62742 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ - New lazy function translated: `concat_str()` to concatenate several columns into one (#349). - New stat functions `pl$cov()`, `pl$rolling_cov()` `pl$corr()`, `pl$rolling_corr()` (#351). +- New methods `$optimization_toggle()` + `$profile()` (#323). # polars 0.7.0 diff --git a/R/expr__expr.R b/R/expr__expr.R index e1b93486c..ff4858d3f 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -2424,16 +2424,16 @@ Expr_limit = function(n = 10) { #' pl$DataFrame(a = -1:3)$select( #' pl$lit(2)$pow(pl$col("a"))$alias("with $pow()"), #' 2^pl$lit(-2:2), # brief use -#' pl$lit(2)$alias("left hand side name") ^ pl$lit(-3:1)$alias("right hand side name") +#' pl$lit(2)$alias("left hand side name")^pl$lit(-3:1)$alias("right hand side name") #' ) #' #' #' # Example on the R behavior of the `**`-'quasi operator' -#' 2^1 # normal use +#' 2^1 # normal use #' 2**1 # this works because ** is converted to the `^`-operator by the R interpreter -#' get("^")(2,1) #this works because there exists a function called "^" +#' get("^")(2, 1) # this works because there exists a function called "^" #' # the R interpreter will not convert "**" to "^" and there is no function named "**" -#' tryCatch(get("**")(2,1), error = as.character) +#' tryCatch(get("**")(2, 1), error = as.character) Expr_pow = function(exponent) { .pr$Expr$pow(self, exponent) |> unwrap("in $pow()") } diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 2a2f7e3a8..27d7d0b9c 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1054,7 +1054,6 @@ LazyFrame_explode = function(columns = list(), ...) { #' @return A LazyFrame #' @examples #' pl$LazyFrame(mtcars)$clone() - LazyFrame_clone = function() { .pr$LazyFrame$clone_see_me_macro(self) } diff --git a/man/Expr_pow.Rd b/man/Expr_pow.Rd index b41b32a85..b818b4fbf 100644 --- a/man/Expr_pow.Rd +++ b/man/Expr_pow.Rd @@ -29,15 +29,15 @@ exponentiation operator. pl$DataFrame(a = -1:3)$select( pl$lit(2)$pow(pl$col("a"))$alias("with $pow()"), 2^pl$lit(-2:2), # brief use - pl$lit(2)$alias("left hand side name") ^ pl$lit(-3:1)$alias("right hand side name") + pl$lit(2)$alias("left hand side name")^pl$lit(-3:1)$alias("right hand side name") ) # Example on the R behavior of the `**`-'quasi operator' -2^1 # normal use +2^1 # normal use 2**1 # this works because ** is converted to the `^`-operator by the R interpreter -get("^")(2,1) #this works because there exists a function called "^" +get("^")(2, 1) # this works because there exists a function called "^" # the R interpreter will not convert "**" to "^" and there is no function named "**" -tryCatch(get("**")(2,1), error = as.character) +tryCatch(get("**")(2, 1), error = as.character) } \keyword{Expr} diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index e96646d39..b7afb558a 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -47,7 +47,7 @@ in background. Get a handle which later can be converted into the resulting Data in interactive mode to not lock R session.} } \value{ -collected \code{DataFrame} or if colkect +A \code{DataFrame} } \description{ collect DataFrame by lazy query @@ -57,7 +57,7 @@ Note: use \verb{$fetch(n)} if you want to run your query on the first \code{n} r This can be a huge time saver in debugging queries. } \examples{ -pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect() +pl$LazyFrame(iris)$filter(pl$col("Species") == "setosa")$collect() } \keyword{DataFrame_new} \keyword{LazyFrame} diff --git a/man/LazyFrame_profile.Rd b/man/LazyFrame_profile.Rd index 4b30477aa..ae4c6a0aa 100644 --- a/man/LazyFrame_profile.Rd +++ b/man/LazyFrame_profile.Rd @@ -7,10 +7,10 @@ LazyFrame_profile() } \value{ -List of two DataFrames, (collected result, profile stats) +List of two \code{DataFrame}s: one with the collected result, the other with the timings of each step. } \description{ -This will run the query and return a tuple containing the materialized DataFrame and +This will run the query and return a list containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed. } \details{ diff --git a/man/pl_corr.Rd b/man/pl_corr.Rd index 48ca21163..8c3a011fa 100644 --- a/man/pl_corr.Rd +++ b/man/pl_corr.Rd @@ -23,6 +23,6 @@ Expr for the computed correlation Calculates the correlation between two columns } \examples{ -lf <- pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) +lf = pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) lf$select(pl$corr("a", "b", method = "spearman"))$collect() } diff --git a/man/pl_cov.Rd b/man/pl_cov.Rd index b7ba29486..402c7c324 100644 --- a/man/pl_cov.Rd +++ b/man/pl_cov.Rd @@ -15,7 +15,7 @@ Expr for the computed covariance Calculates the covariance between two columns / expressions. } \examples{ -lf <- pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) +lf = pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) lf$select(pl$cov("a", "b"))$collect() pl$cov(c(1, 8, 3), c(4, 5, 2))$to_r() } diff --git a/man/pl_rolling_corr.Rd b/man/pl_rolling_corr.Rd index 0a76e3248..79ceec9f8 100644 --- a/man/pl_rolling_corr.Rd +++ b/man/pl_rolling_corr.Rd @@ -22,6 +22,6 @@ Expr for the computed rolling correlation Calculates the rolling correlation between two columns } \examples{ -lf <- pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) +lf = pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) lf$select(pl$rolling_corr("a", "b", window_size = 2))$collect() } diff --git a/man/pl_rolling_cov.Rd b/man/pl_rolling_cov.Rd index ac7b5d520..98de07f68 100644 --- a/man/pl_rolling_cov.Rd +++ b/man/pl_rolling_cov.Rd @@ -22,6 +22,6 @@ Expr for the computed rolling covariance Calculates the rolling covariance between two columns } \examples{ -lf <- pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) +lf = pl$LazyFrame(data.frame(a = c(1, 8, 3), b = c(4, 5, 2))) lf$select(pl$rolling_cov("a", "b", window_size = 2))$collect() } From 131daee90e3afecad95396cf4b6234419eb61100 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Tue, 8 Aug 2023 15:16:13 +0200 Subject: [PATCH 18/19] mention CSE in news --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c1ee62742..96db2c757 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,8 @@ - New lazy function translated: `concat_str()` to concatenate several columns into one (#349). - New stat functions `pl$cov()`, `pl$rolling_cov()` `pl$corr()`, `pl$rolling_corr()` (#351). -- New methods `$optimization_toggle()` + `$profile()` (#323). +- New methods `$optimization_toggle()` + `$profile()` and enable rust-polars feature + CSE: "Activate common subplan elimination optimization" (#323) # polars 0.7.0 From 267c1a30bbd3faab62806908106aba4ca545ad46 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:21:46 +0200 Subject: [PATCH 19/19] minor doc fixes --- R/lazyframe__lazy.R | 35 +++++++++++++++++++---------------- man/LazyFrame_collect.Rd | 38 ++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 27d7d0b9c..13cb2905d 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -265,26 +265,28 @@ LazyFrame_filter = "use_extendr_wrapper" #' @title New DataFrame from LazyFrame_object$collect() #' @description collect DataFrame by lazy query -#' @param type_coercion Boolean. Coerce types such that operations succeed and run on minimal -#' required memory. -#' @param predicate_pushdown Boolean. Applies filters as early as possible/ at scan level. -#' @param projection_pushdown Boolean. Applies filters as early as possible/ at scan level. -#' @param simplify_expression Boolean. Cache subtrees/file scans that are used by multiple subtrees -#' in the query plan. -#' @param no_optimization Boolean. Turn of the following: +#' @param type_coercion Boolean. Coerce types such that operations succeed and +#' run on minimal required memory. +#' @param predicate_pushdown Boolean. Applies filters as early as possible / at +#' scan level. +#' @param projection_pushdown Boolean. Applies filters as early as possible / at +#' scan level. +#' @param simplify_expression Boolean. Cache subtrees/file scans that are used +#' by multiple subtrees in the query plan. +#' @param slice_pushdown Boolean. Only load the required slice from the scan +#' level. Don't materialize sliced outputs (e.g. `join$head(10)`). +#' @param common_subplan_elimination Boolean. Cache subtrees/file scans that +#' are used by multiple subtrees in the query plan. +#' @param no_optimization Boolean. Turn off the following optimizations: #' predicate_pushdown = FALSE #' projection_pushdown = FALSE #' slice_pushdown = FALSE #' common_subplan_elimination = FALSE -#' @param slice_pushdown Boolean. Only load the required slice from the scan level. Don't -#' materialize sliced outputs (e.g. join.head(10)). -#' @param common_subplan_elimination Boolean. Cache subtrees/file scans that are used by multiple -#' subtrees in the query plan. #' @param streaming Boolean. Run parts of the query in a streaming fashion -#' (this is in an alpha state) -#' @param collect_in_background Boolean. Detach this query from R session. Computation will start -#' in background. Get a handle which later can be converted into the resulting DataFrame. Useful -#' in interactive mode to not lock R session. +#' (this is in an alpha state). +#' @param collect_in_background Boolean. Detach this query from R session. +#' Computation will start in background. Get a handle which later can be converted +#' into the resulting DataFrame. Useful in interactive mode to not lock R session. #' @details #' Note: use `$fetch(n)` if you want to run your query on the first `n` rows only. #' This can be a huge time saver in debugging queries. @@ -296,11 +298,12 @@ LazyFrame_collect = function( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, - no_optimization = FALSE, slice_pushdown = TRUE, common_subplan_elimination = TRUE, + no_optimization = FALSE, streaming = FALSE, collect_in_background = FALSE) { + if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index b7afb558a..1ef627a86 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -9,42 +9,44 @@ LazyFrame_collect( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, - no_optimization = FALSE, slice_pushdown = TRUE, common_subplan_elimination = TRUE, + no_optimization = FALSE, streaming = FALSE, collect_in_background = FALSE ) } \arguments{ -\item{type_coercion}{Boolean. Coerce types such that operations succeed and run on minimal -required memory.} +\item{type_coercion}{Boolean. Coerce types such that operations succeed and +run on minimal required memory.} + +\item{predicate_pushdown}{Boolean. Applies filters as early as possible / at +scan level.} -\item{predicate_pushdown}{Boolean. Applies filters as early as possible/ at scan level.} +\item{projection_pushdown}{Boolean. Applies filters as early as possible / at +scan level.} -\item{projection_pushdown}{Boolean. Applies filters as early as possible/ at scan level.} +\item{simplify_expression}{Boolean. Cache subtrees/file scans that are used +by multiple subtrees in the query plan.} -\item{simplify_expression}{Boolean. Cache subtrees/file scans that are used by multiple subtrees -in the query plan.} +\item{slice_pushdown}{Boolean. Only load the required slice from the scan +level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} -\item{no_optimization}{Boolean. Turn of the following: +\item{common_subplan_elimination}{Boolean. Cache subtrees/file scans that +are used by multiple subtrees in the query plan.} + +\item{no_optimization}{Boolean. Turn off the following optimizations: predicate_pushdown = FALSE projection_pushdown = FALSE slice_pushdown = FALSE common_subplan_elimination = FALSE} -\item{slice_pushdown}{Boolean. Only load the required slice from the scan level. Don't -materialize sliced outputs (e.g. join.head(10)).} - -\item{common_subplan_elimination}{Boolean. Cache subtrees/file scans that are used by multiple -subtrees in the query plan.} - \item{streaming}{Boolean. Run parts of the query in a streaming fashion -(this is in an alpha state)} +(this is in an alpha state).} -\item{collect_in_background}{Boolean. Detach this query from R session. Computation will start -in background. Get a handle which later can be converted into the resulting DataFrame. Useful -in interactive mode to not lock R session.} +\item{collect_in_background}{Boolean. Detach this query from R session. +Computation will start in background. Get a handle which later can be converted +into the resulting DataFrame. Useful in interactive mode to not lock R session.} } \value{ A \code{DataFrame}