progress on docs

JuliaData · Dec 20, 2023 · 8807032 · 8807032
1 parent 9db9123
commit 8807032
Show file tree

Hide file tree

Showing 3 changed files with 206 additions and 2 deletions.
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -24,6 +24,7 @@ In addition, DataFramesMeta provides
 * `@byrow` for applying functions to each row of a data frame (only supported inside other macros).
 * `@passmissing` for propagating missing values inside row-wise DataFramesMeta.jl transformations.
 * `@astable` to create multiple columns within a single transformation.
+* `@when` to non-destructively work with a subset of observations (Similar to Stata's `if`)
 * `@chain`, from [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) for piping the above macros together, similar to [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html)'s
   `%>%` in R. 
 
@@ -47,7 +48,133 @@ Use `passmissing`  to propagate `missing` values more easily. See `?passmissing`
 details. `passmissing` is defined in [Missings.jl](https://github.com/JuliaData/Missings.jl)
 but exported by DataFramesMeta for convenience. 
 
-# Provided macros
+# Provided macrosp = graph_mean_outp = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+cp = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+p = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+op = graph_mean_outcomes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+mes_bins(
+  "dev_status", 
+  "income_pctile",  
+  "distance_bin", 
+  "hilly", 
+  "pop_weight_country", 
+  long_shapes)
+
 
 !!! note 
 

diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
@@ -20,7 +20,7 @@ export @with,
        @rtransform, @rselect, @rtransform!, @rselect!,
        @distinct, @rdistinct, @distinct!, @rdistinct!,
        @eachrow, @eachrow!,
-       @byrow, @passmissing, @astable, @kwarg,
+       @byrow, @passmissing, @astable, @kwarg, @when,
        @based_on, @where # deprecated
 
 const DOLLAR = raw"$"

diff --git a/src/macros.jl b/src/macros.jl
@@ -685,6 +685,83 @@ macro with(d, body)
     esc(with_helper(d, body))
 end
 
+"""
+    @when(args...)
+
+Perform operations on a subset of `df`, but still
+return a data frame with the same number of rows as `df`. `@when` can be used
+with the `@transform` macros, `@select` macros, and `@with`.
+
+`@when` is not a "real" macro. It is only functional inside DataFramesMeta.jl macros.
+A motivating example:
+
+```
+@rtransform df begin
+    @when :a == 1
+    :y = :y - mean(:y)
+end
+```
+
+The above block generates the column `:y` which is de-meaned with respect to observations where
+`:a == 1`. If `:y` already exists in `df`, then new values over-write old values only
+when `:a == 1`. If `:y` does not already exist in `df`, then new values are written
+when `:a == 1`, and remaining values are filled with `missing`.
+
+Only one `@when` statement is allowed per transformation macro and it must be the
+first argument in the transformation.
+
+`@when` inherits `@byrow` and `@passmissing` from the transformation. As an example:
+
+```
+@transform df @byrow begin
+    @when :a == 1
+    ...
+end
+```
+
+In the above, the condition inside `@when` operates row-wise. However, `@byrow` and `@passmissing` can
+also be passed independently, such as `@byrow @when :a == 1`.
+
+Like `@subset`, `@when` drops rows where `missing` values are returned. Unlike `@subset`,
+there is currently no way to control this behavior.
+
+## Details
+
+`@when` operates by calling `select` with the `view = true` keyword argument,
+followed by a `transform!` call. See `?transform!` for more details. Roughly,
+the expression
+
+```
+@transform df begin
+    @when :a .== 1
+    :y = 5
+end
+```
+
+translates to
+
+```
+df1 = @subset(copy(df), :a .== 1; view = true)
+df2 = @transform! df1 :y = 5
+parent(df2)
+```
+
+Unlike the other macro-flags, such as `@passmissing` and `@byrow`, `@when` cannot be
+used at the top-level.
+```
+@transform df @byrow @when(:a == 1) begin
+    :x = 1
+    :y = 2
+end
+```
+is not supported.
+
+"""
+macro when(args...)
+    throw(ArgumentError("@passmissing only works inside DataFramesMeta macros."))
+end
+
+
 ASTABLE_RHS_ORDERBY_DOCS = """
 In operations, it is also allowed to use `AsTable(cols)` to work with
 multiple columns at once, where the columns are grouped together in a