From 00e42b12bd1fb1f3dce59a48fcf7b4398f55ede3 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Fri, 14 Feb 2020 03:49:42 -0800 Subject: [PATCH] RFC: Add append!(::StructVector, iterator::Any) using Tables.isrowtable (#117) * Implement append!(::StructVector, ::Any) using Tables.isrowtable * Directly append columns; add some tests --- src/tables.jl | 27 +++++++++++++++++++++++++++ src/utils.jl | 13 +++++++++++++ test/runtests.jl | 7 +++++++ 3 files changed, 47 insertions(+) diff --git a/src/tables.jl b/src/tables.jl index 18fbf06d..f8ce4da6 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -6,3 +6,30 @@ Tables.columnaccess(::Type{<:StructVector}) = true Tables.columns(s::StructVector) = fieldarrays(s) Tables.schema(s::StructVector) = Tables.Schema(staticschema(eltype(s))) + +function Base.append!(s::StructVector, rows) + if Tables.isrowtable(rows) && Tables.columnaccess(rows) + # Input `rows` is a container of rows _and_ satisfies column + # table interface. Thus, we can add the input column-by-column. + table = Tables.columns(rows) + isempty(_setdiff(propertynames(s), Tables.columnnames(rows))) || + _invalid_columns_error(s, rows) + _foreach(propertynames(s)) do name + append!(getproperty(s, name), Tables.getcolumn(table, name)) + end + return s + else + # Otherwise, fallback to a generic implementation expecting + # that `rows` is an iterator: + return foldl(push!, rows; init = s) + end +end + +@noinline function _invalid_columns_error(s, rows) + missingnames = setdiff!(collect(Tables.columnnames(rows)), propertynames(s)) + throw(ArgumentError(string( + "Cannot append rows from `$(typeof(rows))` to `$(typeof(s))` due to ", + "missing column(s):\n", + join(missingnames, ", "), + ))) +end diff --git a/src/utils.jl b/src/utils.jl index a23723c9..355d3168 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -134,3 +134,16 @@ hasfields(::Type{<:NTuple{N, Any}}) where {N} = true hasfields(::Type{<:NamedTuple{names}}) where {names} = true hasfields(::Type{T}) where {T} = !isabstracttype(T) hasfields(::Union) = false + +_setdiff(a, b) = setdiff(a, b) + +@inline _setdiff(::Tuple{}, ::Tuple{}) = () +@inline _setdiff(::Tuple{}, ::Tuple) = () +@inline _setdiff(a::Tuple, ::Tuple{}) = a +@inline _setdiff(a::Tuple, b::Tuple) = _setdiff(_exclude(a, b[1]), Base.tail(b)) +@inline _exclude(a, b) = foldl((ys, x) -> x == b ? ys : (ys..., x), a; init = ()) + +# _foreach(f, xs) = foreach(f, xs) +_foreach(f, xs::Tuple) = foldl((_, x) -> (f(x); nothing), xs; init = nothing) +# Note `foreach` is not optimized for tuples yet. +# See: https://github.com/JuliaLang/julia/pull/31901 diff --git a/test/runtests.jl b/test/runtests.jl index db46924c..e9610602 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -339,6 +339,13 @@ end @test Tables.rowaccess(typeof(s)) @test Tables.columnaccess(s) @test Tables.columnaccess(typeof(s)) + @test append!(StructArray([1im]), [(re = 111, im = 222)]) == + StructArray([1im, 111 + 222im]) + @test append!(StructArray([1im]), (x for x in [(re = 111, im = 222)])) == + StructArray([1im, 111 + 222im]) + # Testing integer column "names": + @test invoke(append!, Tuple{StructVector,Any}, StructArray(([0],)), StructArray(([1],))) == + StructArray(([0, 1],)) end struct S