From a8701c895d18bcc5d48cffeb6244171e88b04b0e Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 14 Sep 2021 12:52:11 -0400
Subject: [PATCH 01/27] initial attempt
---
src/DataFramesMeta.jl | 1 +
src/parsing_astable.jl | 70 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+)
create mode 100644 src/parsing_astable.jl
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index fec20a35..3730bf84 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -22,6 +22,7 @@ export @with,
const DOLLAR = raw"$"
include("parsing.jl")
+include("parsing_astable.jl")
include("macros.jl")
include("linqmacro.jl")
include("eachrow.jl")
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
new file mode 100644
index 00000000..e978688a
--- /dev/null
+++ b/src/parsing_astable.jl
@@ -0,0 +1,70 @@
+function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col)
+ # if it's already been assigned at top-level,
+ # don't add it to the inputs
+ if haskey(lhs_assignments, col)
+ return lhs_assignments[col]
+ else
+ return addkey!(inputs_to_function, col)
+ end
+end
+
+replace_syms_astable!(inputs_to_function, lhs_assignments, x) = x
+replace_syms_astable!(inputs_to_function, lhs_assignments, q::QuoteNode) =
+ conditionally_add_symbols!(inputs_to_function, lhs_assignments, q)
+
+function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr)
+ if onearg(e, :^)
+ return e.args[2]
+ end
+
+ col = get_column_expr(e)
+ if col !== nothing
+ return conditionally_add_symbols!(inputs_to_function, lhs_assignments, col)
+ elseif e.head == :.
+ return replace_dotted_astable!(inputs_to_function, lhs_assignments, e)
+ else
+ return mapexpr(x -> replace_syms_astable!(inputs_to_function, lhs_assignments, x), e)
+ end
+end
+
+protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e
+protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) =
+ replace_syms!(inputs_to_function, lhs_assignments, e)
+
+function replace_dotted_astable!(inputs_to_function, lhs_assignments, e)
+ x_new = replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[1])
+ y_new = protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[2])
+ Expr(:., x_new, y_new)
+end
+
+is_column_assigment(ex) = false
+function is_column_assigment(ex::Expr)
+ ex.head == :(=) && (get_column_expr(ex.args[1]) !== nothing)
+end
+
+function collect_top_level_column_assignments(ex)
+ inputs_to_function = Dict{Any, Symbol}()
+ lhs_assignments = Dict{Any, Symbol}()
+
+ ex = MacroTools.flatten(ex)
+ exprs = map(ex.args) do arg
+ @show arg
+ @show is_column_assigment(arg)
+ if is_column_assigment(arg)
+ lhs = arg.args[1]
+ rhs = arg.args[2]
+ new_ex = replace_syms_astable!(inputs_to_function, lhs_assignments, arg.args[2])
+ if haskey(inputs_to_function, lhs)
+ new_lhs = inputs_to_function[lhs]
+ else
+ new_lhs = addkey!(lhs_assignments, lhs)
+ end
+
+ Expr(:(=), new_lhs, new_ex)
+ else
+ replace_syms_astable!(inputs_to_function, lhs_assignments, arg)
+ end
+ end
+ cols_to_add = collect(keys(inputs_to_function))
+ new_ex = Expr(:block, exprs...)
+end
\ No newline at end of file
From 9b997a6b7a1a2a314a2978240b68a124abd3ace1 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 15 Sep 2021 06:13:27 -0400
Subject: [PATCH 02/27] finally working
---
src/parsing.jl | 13 ++++++++++---
src/parsing_astable.jl | 41 +++++++++++++++++++++++++++++++++--------
2 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index 3a250138..c2a25252 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -91,7 +91,8 @@ is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(na
const BYROW_SYM = Symbol("@byrow")
const PASSMISSING_SYM = Symbol("@passmissing")
-const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false))
+const ASTABLE_SYM = Symbol("@astable")
+const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false))
extract_macro_flags(ex, exprflags = deepcopy(DEFAULT_FLAGS)) = (ex, exprflags)
function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
@@ -269,7 +270,13 @@ function fun_to_vec(ex::Expr;
return ex_col
end
- if no_dest
+ if final_flags[ASTABLE_SYM][]
+ src, fun = get_source_fun_astable(ex; exprflags = final_flags)
+
+ return :($src => $fun => AsTable)
+ end
+
+ if no_dest # subet and with
src, fun = get_source_fun(ex, exprflags = final_flags)
return quote
$src => $fun
@@ -359,7 +366,7 @@ function create_args_vector(arg; wrap_byrow::Bool=false)
outer_flags[BYROW_SYM][] = true
end
- if arg isa Expr && arg.head == :block
+ if arg isa Expr && arg.head == :block && !outer_flags[ASTABLE_SYM][]
x = MacroTools.rmlines(arg).args
else
x = Any[arg]
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index e978688a..c6f8d890 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -28,7 +28,7 @@ function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr)
end
protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e
-protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) =
+protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) =
replace_syms!(inputs_to_function, lhs_assignments, e)
function replace_dotted_astable!(inputs_to_function, lhs_assignments, e)
@@ -42,20 +42,25 @@ function is_column_assigment(ex::Expr)
ex.head == :(=) && (get_column_expr(ex.args[1]) !== nothing)
end
-function collect_top_level_column_assignments(ex)
+# Taken from MacroTools.jl
+# No docstring so assumed untable
+block(ex) = isexpr(ex, :block) ? ex : :($ex;)
+
+function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
inputs_to_function = Dict{Any, Symbol}()
lhs_assignments = Dict{Any, Symbol}()
- ex = MacroTools.flatten(ex)
+ # Make sure all top-level assignments are
+ # in the args vector
+ ex = block(MacroTools.flatten(ex))
exprs = map(ex.args) do arg
- @show arg
- @show is_column_assigment(arg)
if is_column_assigment(arg)
- lhs = arg.args[1]
+ lhs = get_column_expr(arg.args[1])
rhs = arg.args[2]
new_ex = replace_syms_astable!(inputs_to_function, lhs_assignments, arg.args[2])
if haskey(inputs_to_function, lhs)
new_lhs = inputs_to_function[lhs]
+ lhs_assignments[lhs] = new_lhs
else
new_lhs = addkey!(lhs_assignments, lhs)
end
@@ -65,6 +70,26 @@ function collect_top_level_column_assignments(ex)
replace_syms_astable!(inputs_to_function, lhs_assignments, arg)
end
end
- cols_to_add = collect(keys(inputs_to_function))
- new_ex = Expr(:block, exprs...)
+ source = :(DataFramesMeta.make_source_concrete($(Expr(:vect, keys(inputs_to_function)...))))
+
+ inputargs = Expr(:tuple, values(inputs_to_function)...)
+ nt_iterator = (:(Symbol($k) => $v) for (k, v) in lhs_assignments)
+ nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...))
+ body = Expr(:block, Expr(:block, exprs...), nt_expr)
+
+ fun = quote
+ $inputargs -> begin
+ $body
+ end
+ end
+
+ # TODO: Add passmissing support by
+ # checking if any input arguments missing,
+ # and if-so, making a named tuple with
+ # missing values
+ if exprflags[BYROW_SYM][]
+ fun = :(ByRow($fun))
+ end
+
+ return source, fun
end
\ No newline at end of file
From d63956067a705dab4e1a5925ef51e0e2d6177b7a Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 15 Sep 2021 06:16:25 -0400
Subject: [PATCH 03/27] start adding tests
---
test/astable_flag.jl | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 test/astable_flag.jl
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
new file mode 100644
index 00000000..542bcfb5
--- /dev/null
+++ b/test/astable_flag.jl
@@ -0,0 +1,23 @@
+module TestAsTableFlag
+
+using Test
+using DataFrames
+using DataFramesMeta
+using Statistics
+
+const ≅ = isequal
+
+@testset "@astable macro flag" begin
+ df = DataFrame(a = 1, b = 2)
+
+ d = @rtransform df @astable begin
+ :x = 1
+ y = 50
+ :a = :x + y
+ end
+
+ @test d == DataFrame(a = 51, b = 2, x = 1)
+end
+
+
+end # module
\ No newline at end of file
From b77e8ca9161ae308a3092e1ce24d03184b7b646c Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 16 Sep 2021 04:58:56 -0400
Subject: [PATCH 04/27] more tests
---
test/astable_flag.jl | 111 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 106 insertions(+), 5 deletions(-)
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index 542bcfb5..fcdc88b3 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -1,22 +1,123 @@
module TestAsTableFlag
using Test
-using DataFrames
using DataFramesMeta
using Statistics
const ≅ = isequal
-@testset "@astable macro flag" begin
+@testset "@astable with just assignments" begin
df = DataFrame(a = 1, b = 2)
d = @rtransform df @astable begin
:x = 1
- y = 50
- :a = :x + y
+
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 1)
+
+ d = @rselect df @astable begin
+ :x = 1
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 1)
+
+ d = @transform df @astable begin
+ :x = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 5)
+
+ d = @select df @astable begin
+ :x = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 5)
+end
+
+@testset "@astable with just assignments, mutating" begin
+ # After finalizing above testset
+end
+
+@testset "@astable with strings" begin
+ df = DataFrame(a = 1, b = 2)
+
+ x_str = "x"
+ d = @rtransform df @astable begin
+ $x_str = 1
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 1)
+
+ d = @rselect df @astable begin
+ $x_str = 1
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 1)
+
+ d = @transform df @astable begin
+ $x_str = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 5)
+
+ d = @select df @astable begin
+ $x_str = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 5)
+end
+
+@testset "Re-using variables" begin
+ df = DataFrame(a = 1, b = 2)
+
+ d = @rtransform df @astable begin
+ :x = 1
+ y = 5
+ :z = :x + y
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 1, z = 6)
+
+ d = @rselect df @astable begin
+ :x = 1
+ y = 5
+ :z = :x + y
+ end
+
+ @test d == DataFrame(x = 1, z = 6)
+
+ x_str = "x"
+ d = @rtransform df @astable begin
+ $x_str = 1
+ y = 5
+ :z = $x_str + y
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 1, z = 6)
+
+ d = @rselect df @astable begin
+ $x_str = 1
+ y = 5
+ :z = $x_str + y
end
- @test d == DataFrame(a = 51, b = 2, x = 1)
+ @test d == DataFrame(x = 1, z = 6)
end
From 3cdf0d5b4f1ddc79ba6cb992c2ba567b468fe617 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 16 Sep 2021 05:08:59 -0400
Subject: [PATCH 05/27] more tests
---
Project.toml | 3 ++-
src/DataFramesMeta.jl | 2 ++
src/parsing_astable.jl | 2 +-
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/Project.toml b/Project.toml
index fbb357d4..fbebc46e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,14 +6,15 @@ version = "0.9.1"
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
[compat]
+Chain = "0.4"
DataFrames = "1"
MacroTools = "0.5"
Reexport = "0.2, 1"
julia = "1"
-Chain = "0.4"
[extras]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index 3730bf84..1c684842 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -4,6 +4,8 @@ using Reexport
using MacroTools
+using OrderedCollections: OrderedCollections
+
@reexport using DataFrames
@reexport using Chain
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index c6f8d890..becbac3c 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -48,7 +48,7 @@ block(ex) = isexpr(ex, :block) ? ex : :($ex;)
function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
inputs_to_function = Dict{Any, Symbol}()
- lhs_assignments = Dict{Any, Symbol}()
+ lhs_assignments = OrderedCollections.OrderedDict{Any, Symbol}()
# Make sure all top-level assignments are
# in the args vector
From b878fbb68365045b3408d4a108d45cd8d2eee3f5 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 16 Sep 2021 05:33:08 -0400
Subject: [PATCH 06/27] add docstring
---
src/DataFramesMeta.jl | 2 +-
src/macros.jl | 114 ++++++++++++++++++++++++++++++++++--------
2 files changed, 94 insertions(+), 22 deletions(-)
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index 1c684842..56914e42 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -18,7 +18,7 @@ export @with,
@transform, @select, @transform!, @select!,
@rtransform, @rselect, @rtransform!, @rselect!,
@eachrow, @eachrow!,
- @byrow, @passmissing,
+ @byrow, @passmissing, @astable,
@based_on, @where # deprecated
const DOLLAR = raw"$"
diff --git a/src/macros.jl b/src/macros.jl
index e954a371..21dfab1f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -350,6 +350,99 @@ macro passmissing(args...)
throw(ArgumentError("@passmissing only works inside DataFramesMeta macros."))
end
+"""
+ astable(args...)
+
+Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros.
+
+`@astable` acts on a single block. It works through all top-level expressions
+and collects all such expressions of the form `:y = x`, i.e. assignments to a
+`Symbol`, which is a syntax error outside of the macro. At the end of the
+expression, all assignments are collected into a `NamedTuple` to be used
+with the `AsTable` destination in the DataFrames.jl transformation
+mini-language.
+
+Concretely, the expressions
+
+```
+df = DataFrame(a = 1)
+
+@rtransform df @astable begin
+ :x = 1
+ y = 50
+ :z = :x + y + :a
+end
+```
+
+becomes the pair
+
+```
+function f(a)
+ x_t = 1
+ y = 50
+ z_t = x_t + y + a
+
+ (; x = x_t, z = z_t)
+end
+
+transform(df, [:a] => f => AsTable)
+```
+
+`@astable` is useful when performing intermediate calculations
+yet store their results in new columns. For example, the following fails.
+
+```
+@rtransform df begin
+ :new_col_1 = :x + :y
+ :new_col_2 = :new_col_1 + :z
+end
+```
+
+This because DataFrames.jl does not guarantee sequential evaluation of
+transformations. `@astable` solves this problem
+
+@rtransform df @astable begin
+ :new_col_1 = :x + :y
+ :new_col_2 = :new_col_1 + :z
+end
+
+### Examples
+
+```
+julia> df = DataFrame(a = [1, 2, 3], b = [4, 5, 6]);
+
+julia> d = @rtransform df @astable begin
+ :x = 1
+ y = 5
+ :z = :x + y
+ end
+3×4 DataFrame
+ Row │ a b x z
+ │ Int64 Int64 Int64 Int64
+─────┼────────────────────────────
+ 1 │ 1 4 1 6
+ 2 │ 2 5 1 6
+ 3 │ 3 6 1 6
+
+julia> df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 70, 80]);
+
+julia> @by df :a @astable begin
+ $(DOLLAR)"Mean of b" = mean(:b)
+ $(DOLLAR)"Standard deviation of b" = std(:b)
+ end
+2×3 DataFrame
+ Row │ a Mean of b Standard deviation of b
+ │ Int64 Float64 Float64
+─────┼───────────────────────────────────────────
+ 1 │ 1 5.5 0.707107
+ 2 │ 2 75.0 7.07107
+```
+
+"""
+macro astable(args...)
+ throw(ArgumentError("@astable only works inside DataFramesMeta macros."))
+end
+
##############################################################################
##
## @with
@@ -1546,17 +1639,6 @@ function combine_helper(x, args...; deprecation_warning = false)
exprs, outer_flags = create_args_vector(args...)
- fe = first(exprs)
- if length(exprs) == 1 &&
- get_column_expr(fe) === nothing &&
- !(fe.head == :(=) || fe.head == :kw)
-
- @warn "Returning a Table object from @by and @combine now requires `$(DOLLAR)AsTable` on the LHS."
-
- lhs = Expr(:$, :AsTable)
- exprs = ((:($lhs = $fe)),)
- end
-
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
@@ -1666,16 +1748,6 @@ end
function by_helper(x, what, args...)
# Only allow one argument when returning a Table object
exprs, outer_flags = create_args_vector(args...)
- fe = first(exprs)
- if length(exprs) == 1 &&
- get_column_expr(fe) === nothing &&
- !(fe.head == :(=) || fe.head == :kw)
-
- @warn "Returning a Table object from @by and @combine now requires `\$AsTable` on the LHS."
-
- lhs = Expr(:$, :AsTable)
- exprs = ((:($lhs = $fe)),)
- end
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
From 2344a2e459d379d2994a8e905982a3eaa165068e Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 16 Sep 2021 05:53:42 -0400
Subject: [PATCH 07/27] tests pass
---
test/deprecated.jl | 3 ---
test/function_compilation.jl | 4 ++--
test/grouping.jl | 6 ------
test/runtests.jl | 1 +
4 files changed, 3 insertions(+), 11 deletions(-)
diff --git a/test/deprecated.jl b/test/deprecated.jl
index b76c8cbc..126ec441 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -42,7 +42,6 @@ const ≅ = isequal
@test @based_on(gd, n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody]
@test @based_on(gd, body = :i).body == df.i
@test @based_on(gd, transform = :i).transform == df.i
- @test @based_on(gd, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4]
@test @based_on(gd, n = mean(cols(iq))).n == [2.0, 4.5]
@test @based_on(gd, n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5]
@@ -51,7 +50,6 @@ const ≅ = isequal
@test @based_on(gd, n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody]
@test @based_on(gd, body = cols(iq)).body == df.i
@test @based_on(gd, transform = cols(iq)).transform == df.i
- @test @based_on(gd, (n1 = [first(cols(iq))], n2 = [first(cols(yq))])).n1 == [1, 4]
@test @based_on(gd, n = mean(cols(ir))).n == [2.0, 4.5]
@test @based_on(gd, n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5]
@@ -60,7 +58,6 @@ const ≅ = isequal
@test @based_on(gd, n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody]
@test @based_on(gd, body = cols(ir)).body == df.i
@test @based_on(gd, transform = cols(ir)).transform == df.i
- @test @based_on(gd, (n1 = [first(cols(ir))], n2 = [first(cols(yr))])).n1 == [1, 4]
@test @based_on(gd, n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5]
@test @based_on(gd, n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5]
diff --git a/test/function_compilation.jl b/test/function_compilation.jl
index 4c411f61..5921942d 100644
--- a/test/function_compilation.jl
+++ b/test/function_compilation.jl
@@ -154,9 +154,9 @@ using DataFramesMeta
gd = groupby(df, :a)
- @test @combine(gd, testnt(:b)) == DataFrame(a = [1], c = [2])
+ @test @combine(gd, cols(AsTable) = testnt(:b)) == DataFrame(a = [1], c = [2])
- fasttime = @timed @combine(gd, testnt(:b))
+ fasttime = @timed @combine(gd, cols(AsTable) = testnt(:b))
slowtime = @timed combine(gd, :b => (b -> testnt(b)) => AsTable)
(slowtime[2] > fasttime[2]) || @warn("Slow compilation")
diff --git a/test/grouping.jl b/test/grouping.jl
index a998c8a0..e4ea60b0 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -49,7 +49,6 @@ g = groupby(d, :x, sort=true)
@test @combine(gd, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody]
@test @combine(gd, :body = :i).body == df.i
@test @combine(gd, :transform = :i).transform == df.i
- @test @combine(gd, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4]
@test @combine(gd, :n = mean($iq)).n == [2.0, 4.5]
@test @combine(gd, :n = mean($iq) + mean($gq)).n == [3.0, 6.5]
@@ -59,7 +58,6 @@ g = groupby(d, :x, sort=true)
@test @combine(gd, $:n = mean($:i)).n == [2.0, 4.5]
@test @combine(gd, :body = $iq).body == df.i
@test @combine(gd, :transform = $iq).transform == df.i
- @test @combine(gd, (n1 = [first($iq)], n2 = [first($yq)])).n1 == [1, 4]
@test @combine(gd, :n = mean($ir)).n == [2.0, 4.5]
@test @combine(gd, :n = mean($ir) + mean($gr)).n == [3.0, 6.5]
@@ -68,7 +66,6 @@ g = groupby(d, :x, sort=true)
@test @combine(gd, :n = first(Symbol.($yr, ^(:body)))).n == [:vbody, :ybody]
@test @combine(gd, :body = $ir).body == df.i
@test @combine(gd, :transform = $ir).transform == df.i
- @test @combine(gd, (n1 = [first($ir)], n2 = [first($yr)])).n1 == [1, 4]
@test @combine(gd, :n = mean($"i") + 0 * first($:g)).n == [2.0, 4.5]
@test @combine(gd, :n = mean($2) + first($1)).n == [3.0, 6.5]
@@ -192,7 +189,6 @@ end
@test @by(df, :g, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody]
@test @by(df, :g, :body = :i).body == df.i
@test @by(df, :g, :transform = :i).transform == df.i
- @test @by(df, :g, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4]
@test @by(df, :g, :n = mean($iq)).n == [2.0, 4.5]
@test @by(df, :g, :n = mean($iq) + mean($gq)).n == [3.0, 6.5]
@@ -202,7 +198,6 @@ end
@test @by(df, :g, $:n = mean($:i)).n == [2.0, 4.5]
@test @by(df, :g, :body = $iq).body == df.i
@test @by(df, :g, :transform = $iq).transform == df.i
- @test @by(df, :g, (n1 = [first($iq)], n2 = [first($yq)])).n1 == [1, 4]
@test @by(df, "g", :n = mean($ir)).n == [2.0, 4.5]
@test @by(df, "g", :n = mean($ir) + mean($gr)).n == [3.0, 6.5]
@@ -211,7 +206,6 @@ end
@test @by(df, "g", :n = first(Symbol.($yr, ^(:body)))).n == [:vbody, :ybody]
@test @by(df, "g", :body = $ir).body == df.i
@test @by(df, "g", :transform = $ir).transform == df.i
- @test @by(df, "g", (n1 = [first($ir)], n2 = [first($yr)])).n1 == [1, 4]
@test @by(df, "g", :n = mean($"i") + 0 * first($:g)).n == [2.0, 4.5]
@test @by(df, "g", :n = mean($2) + first($1)).n == [3.0, 6.5]
diff --git a/test/runtests.jl b/test/runtests.jl
index 3218556f..5ab6f363 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,6 +12,7 @@ my_tests = ["dataframes.jl",
"deprecated.jl",
"byrow.jl",
"astable.jl",
+ "astable_flag.jl",
"passmissing.jl"]
println("Running tests:")
From 6557def332c47069753fe84e2871ae3d558d571b Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 16 Sep 2021 06:19:07 -0400
Subject: [PATCH 08/27] add ByRow in docstring
---
src/macros.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/macros.jl b/src/macros.jl
index 21dfab1f..d876cae2 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -385,7 +385,7 @@ function f(a)
(; x = x_t, z = z_t)
end
-transform(df, [:a] => f => AsTable)
+transform(df, [:a] => ByRow(f) => AsTable)
```
`@astable` is useful when performing intermediate calculations
From 6002defe9c6d284f78155dd14506f93e1c136ce4 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 21 Sep 2021 05:33:32 -0400
Subject: [PATCH 09/27] add type annotation
---
src/macros.jl | 15 ++++++++-------
src/parsing_astable.jl | 23 +++++++++++++++--------
2 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index d876cae2..074f8571 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -427,15 +427,16 @@ julia> d = @rtransform df @astable begin
julia> df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 70, 80]);
julia> @by df :a @astable begin
- $(DOLLAR)"Mean of b" = mean(:b)
- $(DOLLAR)"Standard deviation of b" = std(:b)
+ ex = extrema(:b)
+ :min_b = first(ex)
+ :max_b = last(ex)
end
2×3 DataFrame
- Row │ a Mean of b Standard deviation of b
- │ Int64 Float64 Float64
-─────┼───────────────────────────────────────────
- 1 │ 1 5.5 0.707107
- 2 │ 2 75.0 7.07107
+ Row │ a min_b max_b
+ │ Int64 Int64 Int64
+─────┼─────────────────────
+ 1 │ 1 5 6
+ 2 │ 2 70 80
```
"""
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index becbac3c..09d46995 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -1,4 +1,5 @@
-function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col)
+function conditionally_add_symbols!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, col)
# if it's already been assigned at top-level,
# don't add it to the inputs
if haskey(lhs_assignments, col)
@@ -8,11 +9,14 @@ function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col)
end
end
-replace_syms_astable!(inputs_to_function, lhs_assignments, x) = x
-replace_syms_astable!(inputs_to_function, lhs_assignments, q::QuoteNode) =
+replace_syms_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, x) = x
+replace_syms_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, q::QuoteNode) =
conditionally_add_symbols!(inputs_to_function, lhs_assignments, q)
-function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr)
+function replace_syms_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, e::Expr)
if onearg(e, :^)
return e.args[2]
end
@@ -27,11 +31,14 @@ function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr)
end
end
-protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e
-protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) =
+protect_replace_syms_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, e) = e
+protect_replace_syms_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, e::Expr) =
replace_syms!(inputs_to_function, lhs_assignments, e)
-function replace_dotted_astable!(inputs_to_function, lhs_assignments, e)
+function replace_dotted_astable!(inputs_to_function::AbstractDict,
+ lhs_assignments::OrderedCollections.OrderedDict, e)
x_new = replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[1])
y_new = protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[2])
Expr(:., x_new, y_new)
@@ -43,7 +50,7 @@ function is_column_assigment(ex::Expr)
end
# Taken from MacroTools.jl
-# No docstring so assumed untable
+# No docstring so assumed unstable
block(ex) = isexpr(ex, :block) ? ex : :($ex;)
function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
From 0eca67d1ba2cfd703f101a3f198c64506d7a2ec0 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Tue, 21 Sep 2021 09:38:20 -0400
Subject: [PATCH 10/27] Apply suggestions from code review
Co-authored-by: Milan Bouchet-Valat
---
src/macros.jl | 4 ++--
src/parsing.jl | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index d876cae2..3a1bc282 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -356,7 +356,7 @@ end
Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros.
`@astable` acts on a single block. It works through all top-level expressions
-and collects all such expressions of the form `:y = x`, i.e. assignments to a
+and collects all such expressions of the form `:y = ...`, i.e. assignments to a
`Symbol`, which is a syntax error outside of the macro. At the end of the
expression, all assignments are collected into a `NamedTuple` to be used
with the `AsTable` destination in the DataFrames.jl transformation
@@ -374,7 +374,7 @@ df = DataFrame(a = 1)
end
```
-becomes the pair
+become the pair
```
function f(a)
diff --git a/src/parsing.jl b/src/parsing.jl
index c2a25252..a4ced7fd 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -276,7 +276,7 @@ function fun_to_vec(ex::Expr;
return :($src => $fun => AsTable)
end
- if no_dest # subet and with
+ if no_dest # subset and with
src, fun = get_source_fun(ex, exprflags = final_flags)
return quote
$src => $fun
From 08a1c4bcc9e9655c0cf18141acd4b6d29397834e Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 21 Sep 2021 05:42:54 -0400
Subject: [PATCH 11/27] better docs
---
src/macros.jl | 22 +++++++++++++++++++---
src/parsing_astable.jl | 9 ++++-----
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 82f56c29..21d3fca8 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -353,7 +353,7 @@ end
"""
astable(args...)
-Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros.
+Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros.
`@astable` acts on a single block. It works through all top-level expressions
and collects all such expressions of the form `:y = ...`, i.e. assignments to a
@@ -388,8 +388,24 @@ end
transform(df, [:a] => ByRow(f) => AsTable)
```
-`@astable` is useful when performing intermediate calculations
-yet store their results in new columns. For example, the following fails.
+`@astable` has two major advantages at the cost of increasing complexity.
+First, `@astable` makes it easy to create multiple columns from a single
+transformation, which share a scope. For example, `@astable` allows
+for the following
+
+```
+@transform df @astable begin
+ m = mean(:x)
+ :x_demeaned = :x .- m
+ :x2_demeaned = :x2 .- m
+end
+```
+
+The creation of `:x_demeaned` and `:x2_demeaned` both share the variable `m`,
+which does not need to be calculated twice.
+
+Second, `@astable` is useful when performing intermediate calculations
+and storing their results in new columns. For example, the following fails.
```
@rtransform df begin
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index 09d46995..6149dc9f 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -2,10 +2,8 @@ function conditionally_add_symbols!(inputs_to_function::AbstractDict,
lhs_assignments::OrderedCollections.OrderedDict, col)
# if it's already been assigned at top-level,
# don't add it to the inputs
- if haskey(lhs_assignments, col)
- return lhs_assignments[col]
- else
- return addkey!(inputs_to_function, col)
+ return get!(lhs_assignments, col) do
+ gensym()
end
end
@@ -69,7 +67,8 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
new_lhs = inputs_to_function[lhs]
lhs_assignments[lhs] = new_lhs
else
- new_lhs = addkey!(lhs_assignments, lhs)
+ new_lhs = gensym()
+ lhs_assignments[lhs] = new_lhs
end
Expr(:(=), new_lhs, new_ex)
From 581b2cfcf2762782de0797fd6674d8185b8b4d37 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 21 Sep 2021 05:43:42 -0400
Subject: [PATCH 12/27] more docs fixes
---
src/macros.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/macros.jl b/src/macros.jl
index 21d3fca8..41f2bb46 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -357,7 +357,7 @@ Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macr
`@astable` acts on a single block. It works through all top-level expressions
and collects all such expressions of the form `:y = ...`, i.e. assignments to a
-`Symbol`, which is a syntax error outside of the macro. At the end of the
+`Symbol`, which is a syntax error outside of DataFramesMeta.jl macros. At the end of the
expression, all assignments are collected into a `NamedTuple` to be used
with the `AsTable` destination in the DataFrames.jl transformation
mini-language.
From 7cc8947b38aabaeda9f6be6615fadf7f2285036e Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 21 Sep 2021 06:58:53 -0400
Subject: [PATCH 13/27] update index.md
---
docs/src/index.md | 31 +++++++++++++++++++++++++++++--
src/macros.jl | 4 ++++
src/parsing_astable.jl | 9 +++++----
3 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index b110d01e..e643c638 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -22,6 +22,7 @@ In addition, DataFramesMeta provides
convenient syntax.
* `@byrow` for applying functions to each row of a data frame (only supported inside other macros).
* `@passmissing` for propagating missing values inside row-wise DataFramesMeta.jl transformations.
+* `@astable` to create multiple columns within a single transformation.
* `@chain`, from [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) for piping the above macros together, similar to [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html)'s
`%>%` in R.
@@ -396,11 +397,37 @@ julia> @rtransform df @passmissing x = parse(Int, :x_str)
3 │ missing missing
```
+## Creating multiple columns at once with `@astable`
+
+Often new variables may depend on the same intermediate calculations. `@astable` makes it easy to create multiple
+new variables in the same operation, yet have them share
+information.
+
+In a single block, all assignments of the form `:y = f(:x)`
+or `$y = f(:x)` at the top-level are generate new columns.
+
+```
+julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]);
+
+julia> @transform df @astable begin
+ ex = extrema(:b)
+ :b_first = :b .- first(ex)
+ :b_last = :b .- last(ex)
+ end
+3×4 DataFrame
+ Row │ a b b_first b_last
+ │ Int64 Int64 Int64 Int64
+─────┼───────────────────────────────
+ 1 │ 1 400 0 -200
+ 2 │ 2 500 100 -100
+ 3 │ 3 600 200 0
+```
+
+
## [Working with column names programmatically with `$`](@id dollar)
DataFramesMeta provides the special syntax `$` for referring to
-columns in a data frame via a `Symbol`, string, or column position as either
-a literal or a variable.
+columns in a data frame via a `Symbol`, string, or column position as either a literal or a variable.
```julia
df = DataFrame(A = 1:3, B = [2, 1, 2])
diff --git a/src/macros.jl b/src/macros.jl
index 41f2bb46..9780f620 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -422,6 +422,10 @@ transformations. `@astable` solves this problem
:new_col_2 = :new_col_1 + :z
end
+Column assignment in `@astable` follows the same rules as
+column assignment more generally. Construct a new column
+from a string by escaping it with `$DOLLAR`.
+
### Examples
```
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index 6149dc9f..09d46995 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -2,8 +2,10 @@ function conditionally_add_symbols!(inputs_to_function::AbstractDict,
lhs_assignments::OrderedCollections.OrderedDict, col)
# if it's already been assigned at top-level,
# don't add it to the inputs
- return get!(lhs_assignments, col) do
- gensym()
+ if haskey(lhs_assignments, col)
+ return lhs_assignments[col]
+ else
+ return addkey!(inputs_to_function, col)
end
end
@@ -67,8 +69,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
new_lhs = inputs_to_function[lhs]
lhs_assignments[lhs] = new_lhs
else
- new_lhs = gensym()
- lhs_assignments[lhs] = new_lhs
+ new_lhs = addkey!(lhs_assignments, lhs)
end
Expr(:(=), new_lhs, new_ex)
From ab9bae47ae3a17d7070724f5c2a8f5f248f595f5 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 22 Sep 2021 06:29:25 -0400
Subject: [PATCH 14/27] clean named tuple creation
---
docs/src/index.md | 3 ++-
src/parsing_astable.jl | 8 +++++++-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index e643c638..69d6db28 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -404,7 +404,8 @@ new variables in the same operation, yet have them share
information.
In a single block, all assignments of the form `:y = f(:x)`
-or `$y = f(:x)` at the top-level are generate new columns.
+or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y`
+must be a string, `Symbol`.
```
julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]);
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index 09d46995..d6f62035 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -53,6 +53,12 @@ end
# No docstring so assumed unstable
block(ex) = isexpr(ex, :block) ? ex : :($ex;)
+sym_or_str_to_sym(x::Union{AbstractString, Symbol}) = Symbol(x)
+function sym_or_str_to_sym(x)
+ e = "New columns created inside @astable must be Symbols or AbstractStrings"
+ throw(ArgumentError(e))
+end
+
function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
inputs_to_function = Dict{Any, Symbol}()
lhs_assignments = OrderedCollections.OrderedDict{Any, Symbol}()
@@ -80,7 +86,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
source = :(DataFramesMeta.make_source_concrete($(Expr(:vect, keys(inputs_to_function)...))))
inputargs = Expr(:tuple, values(inputs_to_function)...)
- nt_iterator = (:(Symbol($k) => $v) for (k, v) in lhs_assignments)
+ nt_iterator = (:(DataFramesMeta.sym_or_str_to_sym($k) => $v) for (k, v) in lhs_assignments)
nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...))
body = Expr(:block, Expr(:block, exprs...), nt_expr)
From 495f08ac763e41f7fc8e2ee18761e94cf0b54635 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 22 Sep 2021 07:19:53 -0400
Subject: [PATCH 15/27] add example with string
---
docs/src/index.md | 2 +-
src/macros.jl | 18 ++++++++++++++++--
test/astable_flag.jl | 7 +++++++
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index 69d6db28..6e6dda6e 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -405,7 +405,7 @@ information.
In a single block, all assignments of the form `:y = f(:x)`
or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y`
-must be a string, `Symbol`.
+must be a string or `Symbol`.
```
julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]);
diff --git a/src/macros.jl b/src/macros.jl
index 9780f620..56a68291 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -351,7 +351,7 @@ macro passmissing(args...)
end
"""
- astable(args...)
+ @astable(args...)
Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros.
@@ -391,7 +391,7 @@ transform(df, [:a] => ByRow(f) => AsTable)
`@astable` has two major advantages at the cost of increasing complexity.
First, `@astable` makes it easy to create multiple columns from a single
transformation, which share a scope. For example, `@astable` allows
-for the following
+for the following (where `:x` and `:x_2` exist in the `DataFrame` already).
```
@transform df @astable begin
@@ -457,6 +457,20 @@ julia> @by df :a @astable begin
─────┼─────────────────────
1 │ 1 5 6
2 │ 2 70 80
+
+julia> @rtransform df @astable begin
+ f_a = first(:a)
+ $(DOLLAR)new_col = :a + :b + f_a
+ :y = :a * :b
+ end
+4×4 DataFrame
+ Row │ a b New Column y
+ │ Int64 Int64 Int64 Int64
+─────┼─────────────────────────────────
+ 1 │ 1 5 7 5
+ 2 │ 1 6 8 6
+ 3 │ 2 70 74 140
+ 4 │ 2 80 84 160
```
"""
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index fcdc88b3..1ea1d2b4 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -121,4 +121,11 @@ end
end
+
+@testset "bad assignments" begin
+ @eval df = DataFrame(y = 1)
+ @test_throws ArgumentError @eval @transform df @astable cols(1) = :y
+ @test_throws ArgumentError @eval @transform df @astable cols(AsTable) = :y
+end
+
end # module
\ No newline at end of file
From 01cb5e7b884eeb283e8c816c8a822277f566c9d8 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 22 Sep 2021 10:43:48 -0400
Subject: [PATCH 16/27] grouping tests
---
src/macros.jl | 10 +++++++---
test/astable_flag.jl | 38 ++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 56a68291..5462032f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -284,7 +284,7 @@ end
"""
- passmissing(args...)
+ @passmissing(args...)
Propograte missing values inside DataFramesMeta.jl macros.
@@ -353,7 +353,8 @@ end
"""
@astable(args...)
-Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros.
+Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl
+macros, `@select`, `@transform`, and their mutating and row-wise equivalents.
`@astable` acts on a single block. It works through all top-level expressions
and collects all such expressions of the form `:y = ...`, i.e. assignments to a
@@ -424,7 +425,10 @@ end
Column assignment in `@astable` follows the same rules as
column assignment more generally. Construct a new column
-from a string by escaping it with `$DOLLAR`.
+from a string by escaping it with `$DOLLAR`, which can be a
+`Symbol` or an `AbstractString`. References to existing
+columns may be a `Symbol`, `AbstractString`, or an
+integer.
### Examples
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index 1ea1d2b4..1ca9b6fd 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -120,6 +120,44 @@ end
@test d == DataFrame(x = 1, z = 6)
end
+@testset "grouping astable flag" begin
+ df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 7, 8])
+
+ gd = groupby(df, :a)
+
+ d = @combine gd @astable begin
+ ex = extrema(:b)
+ :b_min = ex[1]
+ :b_max = ex[2]
+ end
+
+ @test sort(d.b_min) == [5, 7]
+
+ d = @combine gd @astable begin
+ ex = extrema(:b)
+ $"b_min" = ex[1]
+ $"b_max" = ex[2]
+ end
+
+ @test sort(d.b_min) == [5, 7]
+
+ d = @by df :a @astable begin
+ ex = extrema(:b)
+ :b_min = ex[1]
+ :b_max = ex[2]
+ end
+
+ @test sort(d.b_min) == [5, 7]
+
+ d = @by df :a @astable begin
+ ex = extrema(:b)
+ $"b_min" = ex[1]
+ $"b_max" = ex[2]
+ end
+
+ @test sort(d.b_min) == [5, 7]
+end
+
@testset "bad assignments" begin
From 01fb3b72f5d390cc1d36445549e9999f59baf696 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Wed, 22 Sep 2021 16:23:13 -0400
Subject: [PATCH 17/27] Update src/macros.jl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Bogumił Kamiński
---
src/macros.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/macros.jl b/src/macros.jl
index 5462032f..02f86211 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -392,7 +392,7 @@ transform(df, [:a] => ByRow(f) => AsTable)
`@astable` has two major advantages at the cost of increasing complexity.
First, `@astable` makes it easy to create multiple columns from a single
transformation, which share a scope. For example, `@astable` allows
-for the following (where `:x` and `:x_2` exist in the `DataFrame` already).
+for the following (where `:x` and `:x_2` exist in the data frame already).
```
@transform df @astable begin
From 915191c20d4de25b7efcf1deca030bb1fd7e8372 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 06:08:40 -0400
Subject: [PATCH 18/27] changes
---
src/macros.jl | 34 +++++++++++++++++++++++-----------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 5462032f..cbee5d3c 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -357,11 +357,11 @@ Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl
macros, `@select`, `@transform`, and their mutating and row-wise equivalents.
`@astable` acts on a single block. It works through all top-level expressions
-and collects all such expressions of the form `:y = ...`, i.e. assignments to a
-`Symbol`, which is a syntax error outside of DataFramesMeta.jl macros. At the end of the
-expression, all assignments are collected into a `NamedTuple` to be used
-with the `AsTable` destination in the DataFrames.jl transformation
-mini-language.
+and collects all such expressions of the form `:y = ...` or `$y = ...`, i.e. assignments to a
+`Symbol` or an escaped column identifier, which is a syntax error outside of
+DataFramesMeta.jl macros. At the end of the expression, all assignments are collected
+into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl
+transformation mini-language.
Concretely, the expressions
@@ -423,12 +423,22 @@ transformations. `@astable` solves this problem
:new_col_2 = :new_col_1 + :z
end
-Column assignment in `@astable` follows the same rules as
-column assignment more generally. Construct a new column
-from a string by escaping it with `$DOLLAR`, which can be a
-`Symbol` or an `AbstractString`. References to existing
-columns may be a `Symbol`, `AbstractString`, or an
-integer.
+Column assignment in `@astable` follows similar rules as
+column assignment in other DataFramesMeta.jl macros. The left-
+-hand-side of a column assignment can be either a `Symbol` or any
+expression which evaluates to a `Symbol` or `AbstractString`. For example
+`:y = ...`, and `$y = ...` are both valid ways of assigning a new column.
+However unlike other DataFramesMeta.jl macros, multi-column assignments via
+`AsTable` are disallowed. The following will fail.
+
+```
+@transform df @astable begin
+ $AsTable = :x
+end
+```
+
+References to existing columns also follow the same
+rules as other DataFramesMeta.jl macros.
### Examples
@@ -462,6 +472,8 @@ julia> @by df :a @astable begin
1 │ 1 5 6
2 │ 2 70 80
+julia> new_col = "New Column";
+
julia> @rtransform df @astable begin
f_a = first(:a)
$(DOLLAR)new_col = :a + :b + f_a
From 2ce4d9ed803398746b58887c083193d04eb6fe0b Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 06:17:22 -0400
Subject: [PATCH 19/27] fix some errors
---
src/macros.jl | 7 +++----
src/parsing_astable.jl | 1 +
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index e38c24d9..6198349c 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -282,11 +282,10 @@ macro byrow(args...)
throw(ArgumentError("@byrow is deprecated outside of DataFramesMeta macros."))
end
-
"""
@passmissing(args...)
-Propograte missing values inside DataFramesMeta.jl macros.
+Propagrate missing values inside DataFramesMeta.jl macros.
`@passmissing` is not a "real" Julia macro but rather serves as a "flag"
@@ -357,7 +356,7 @@ Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl
macros, `@select`, `@transform`, and their mutating and row-wise equivalents.
`@astable` acts on a single block. It works through all top-level expressions
-and collects all such expressions of the form `:y = ...` or `$y = ...`, i.e. assignments to a
+and collects all such expressions of the form `:y = ...` or `$(DOLLAR)y = ...`, i.e. assignments to a
`Symbol` or an escaped column identifier, which is a syntax error outside of
DataFramesMeta.jl macros. At the end of the expression, all assignments are collected
into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl
@@ -427,7 +426,7 @@ Column assignment in `@astable` follows similar rules as
column assignment in other DataFramesMeta.jl macros. The left-
-hand-side of a column assignment can be either a `Symbol` or any
expression which evaluates to a `Symbol` or `AbstractString`. For example
-`:y = ...`, and `$y = ...` are both valid ways of assigning a new column.
+`:y = ...`, and `$(DOLLAR)y = ...` are both valid ways of assigning a new column.
However unlike other DataFramesMeta.jl macros, multi-column assignments via
`AsTable` are disallowed. The following will fail.
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index d6f62035..d78138d4 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -88,6 +88,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
inputargs = Expr(:tuple, values(inputs_to_function)...)
nt_iterator = (:(DataFramesMeta.sym_or_str_to_sym($k) => $v) for (k, v) in lhs_assignments)
nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...))
+
body = Expr(:block, Expr(:block, exprs...), nt_expr)
fun = quote
From 713eaf08608afcd6dccc9228a7a0c53defa9ed93 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Thu, 23 Sep 2021 10:18:05 -0400
Subject: [PATCH 20/27] Update src/parsing_astable.jl
Co-authored-by: Milan Bouchet-Valat
---
src/parsing_astable.jl | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl
index d78138d4..16987d69 100644
--- a/src/parsing_astable.jl
+++ b/src/parsing_astable.jl
@@ -54,10 +54,8 @@ end
block(ex) = isexpr(ex, :block) ? ex : :($ex;)
sym_or_str_to_sym(x::Union{AbstractString, Symbol}) = Symbol(x)
-function sym_or_str_to_sym(x)
- e = "New columns created inside @astable must be Symbols or AbstractStrings"
- throw(ArgumentError(e))
-end
+sym_or_str_to_sym(x) =
+ throw(ArgumentError("New columns created inside @astable must be Symbols or AbstractStrings"))
function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS))
inputs_to_function = Dict{Any, Symbol}()
From 4e01c4ac92889bd35b823bf145a4c724dd8806e3 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 06:30:24 -0400
Subject: [PATCH 21/27] add snipper to transform, select, combine, by
---
src/macros.jl | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/src/macros.jl b/src/macros.jl
index 6198349c..41e6b728 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -349,6 +349,12 @@ macro passmissing(args...)
throw(ArgumentError("@passmissing only works inside DataFramesMeta macros."))
end
+global astable_docstring_snippet = """
+ Transformations can also use the macro-flag `@astable` for creating multiple
+ new columns at once and letting transformations share the same name-space.
+ See `? @astable` for more details.
+ """
+
"""
@astable(args...)
@@ -1240,6 +1246,8 @@ transformations by row, `@transform` allows `@byrow` at the
beginning of a block of transformations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
+$astable_docstring_snippet
+
### Examples
```jldoctest
@@ -1376,6 +1384,8 @@ transform!ations by row, `@transform!` allows `@byrow` at the
beginning of a block of transform!ations (i.e. `@byrow begin... end`).
All transform!ations in the block will operate by row.
+$astable_docstring_snippet
+
### Examples
```jldoctest
@@ -1488,6 +1498,8 @@ transformations by row, `@select` allows `@byrow` at the
beginning of a block of selectations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
+$astable_docstring_snippet
+
### Examples
```jldoctest
@@ -1608,6 +1620,8 @@ transformations by row, `@select!` allows `@byrow` at the
beginning of a block of select!ations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
+$astable_docstring_snippet
+
### Examples
```jldoctest
@@ -1724,6 +1738,8 @@ and
@combine(df, :mx = mean(:x), :sx = std(:x))
```
+$astable_docstring_snippet
+
### Examples
```julia
@@ -1840,6 +1856,8 @@ and
@by(df, :g, mx = mean(:x), sx = std(:x))
```
+$astable_docstring_snippet
+
### Examples
```julia
From 57b40512c519d893d717c68a2d5c6809491cee25 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 06:34:29 -0400
Subject: [PATCH 22/27] add macro check
---
src/parsing.jl | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/parsing.jl b/src/parsing.jl
index a4ced7fd..a8a263f0 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -127,6 +127,9 @@ function check_macro_flags_consistency(exprflags)
if !exprflags[BYROW_SYM][]
s = "The `@passmissing` flag is currently only allowed with the `@byrow` flag"
throw(ArgumentError(s))
+ elseif exprflags[ASTABLE_SYM][]
+ s = "The `@passmissing` flag is currently not allowed with the `@astable` flag"
+ throw(ArgumentError(s))
end
end
end
From da7674d9b281b7c28be9eb981daa76aac3e26e38 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 06:41:43 -0400
Subject: [PATCH 23/27] add errors for bad flag combo
---
src/parsing.jl | 1 -
test/astable_flag.jl | 9 ++++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index a8a263f0..c83df4c2 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -109,7 +109,6 @@ function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
return (ex, exprflags)
end
end
-
return (ex, exprflags)
end
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index 1ca9b6fd..012b7a4f 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -158,7 +158,14 @@ end
@test sort(d.b_min) == [5, 7]
end
-
+@testset "errors with passmissing" begin
+ @eval df = DataFrame(y = 1)
+ @test_throws LoadError @eval @transform df @passmising @byrow @astable :x = 2
+ @test_throws LoadError @eval @transform df @byrow @astable @passmissing :x = 2
+ @test_throws LoadError @eval @transform df @astable @passmissing @byrow :x = 2
+ @test_throws LoadError @eval @rtransform df @astable @passmissing :x = 2
+ @test_throws LoadError @eval @rtransform df @passmissing @astable :x = 2
+end
@testset "bad assignments" begin
@eval df = DataFrame(y = 1)
From 285e3acf17e20c15394540e5afac2bfdaa8ec8a7 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 09:58:25 -0400
Subject: [PATCH 24/27] better grouping tests
---
test/astable_flag.jl | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index 012b7a4f..6c063840 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -131,7 +131,9 @@ end
:b_max = ex[2]
end
- @test sort(d.b_min) == [5, 7]
+ res_sorted = DataFrame(a = [1, 2], b_min = [5, 7], b_max = [6, 8])
+
+ @test sort(d, :b_min) == res_sorted
d = @combine gd @astable begin
ex = extrema(:b)
@@ -139,7 +141,7 @@ end
$"b_max" = ex[2]
end
- @test sort(d.b_min) == [5, 7]
+ @test sort(d, :b_min) == res_sorted
d = @by df :a @astable begin
ex = extrema(:b)
@@ -147,7 +149,7 @@ end
:b_max = ex[2]
end
- @test sort(d.b_min) == [5, 7]
+ @test sort(d, :b_min) == res_sorted
d = @by df :a @astable begin
ex = extrema(:b)
@@ -155,7 +157,7 @@ end
$"b_max" = ex[2]
end
- @test sort(d.b_min) == [5, 7]
+ @test sort(d, :b_min) == res_sorted
end
@testset "errors with passmissing" begin
From 09c692a3e2467ebab86cca9bb89986745c998e36 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 23 Sep 2021 12:56:37 -0400
Subject: [PATCH 25/27] add mutating tests
---
src/parsing.jl | 2 ++
test/astable_flag.jl | 48 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index c83df4c2..4708011f 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -226,6 +226,8 @@ function get_source_fun(function_expr; exprflags = deepcopy(DEFAULT_FLAGS))
end
end
+ println(MacroTools.prettify(fun))
+
return source, fun
end
diff --git a/test/astable_flag.jl b/test/astable_flag.jl
index 6c063840..01a2b32b 100644
--- a/test/astable_flag.jl
+++ b/test/astable_flag.jl
@@ -11,7 +11,6 @@ const ≅ = isequal
d = @rtransform df @astable begin
:x = 1
-
nothing
end
@@ -42,10 +41,6 @@ const ≅ = isequal
@test d == DataFrame(x = 5)
end
-@testset "@astable with just assignments, mutating" begin
- # After finalizing above testset
-end
-
@testset "@astable with strings" begin
df = DataFrame(a = 1, b = 2)
@@ -120,6 +115,49 @@ end
@test d == DataFrame(x = 1, z = 6)
end
+@testset "@astable with mutation" begin
+ df = DataFrame(a = 1, b = 2)
+
+ df2 = copy(df)
+ d = @rtransform! df2 @astable begin
+ :x = 1
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 1)
+ @test d === df2
+
+ df2 = copy(df)
+ d = @rselect! df2 @astable begin
+ :x = 1
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 1)
+ @test d === df2
+
+ df2 = copy(df)
+ d = @transform! df2 @astable begin
+ :x = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(a = 1, b = 2, x = 5)
+ @test d === df2
+
+ df2 = copy(df)
+ d = @select! df2 @astable begin
+ :x = [5]
+ y = 100
+ nothing
+ end
+
+ @test d == DataFrame(x = 5)
+ @test d === df2
+end
+
@testset "grouping astable flag" begin
df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 7, 8])
From ae26da89baa20392bad764bb0601ea3bac44e97f Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Fri, 24 Sep 2021 05:03:59 -0400
Subject: [PATCH 26/27] get rid of debugging printin
---
src/parsing.jl | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index 4708011f..c83df4c2 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -226,8 +226,6 @@ function get_source_fun(function_expr; exprflags = deepcopy(DEFAULT_FLAGS))
end
end
- println(MacroTools.prettify(fun))
-
return source, fun
end
From a7fd1a29badf3ae305c117217b38f03993deedb1 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Fri, 24 Sep 2021 09:11:04 -0400
Subject: [PATCH 27/27] Apply suggestions from code review
Co-authored-by: Milan Bouchet-Valat
---
docs/src/index.md | 2 +-
src/macros.jl | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index 6e6dda6e..2ac92866 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -404,7 +404,7 @@ new variables in the same operation, yet have them share
information.
In a single block, all assignments of the form `:y = f(:x)`
-or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y`
+or `$y = f(:x)` at the top-level generate new columns. In the second example, `y`
must be a string or `Symbol`.
```
diff --git a/src/macros.jl b/src/macros.jl
index 41e6b728..7fbfad8f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -285,7 +285,7 @@ end
"""
@passmissing(args...)
-Propagrate missing values inside DataFramesMeta.jl macros.
+Propagate missing values inside DataFramesMeta.jl macros.
`@passmissing` is not a "real" Julia macro but rather serves as a "flag"
@@ -349,8 +349,8 @@ macro passmissing(args...)
throw(ArgumentError("@passmissing only works inside DataFramesMeta macros."))
end
-global astable_docstring_snippet = """
- Transformations can also use the macro-flag `@astable` for creating multiple
+const astable_docstring_snippet = """
+ Transformations can also use the macro-flag [`@astable`](@ref) for creating multiple
new columns at once and letting transformations share the same name-space.
See `? @astable` for more details.
"""