From b2a6b538f6e7b511894d143aab5d134816577793 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 7 Jun 2024 16:06:06 -0300 Subject: [PATCH] Fix concat_rows with not aligned dataframes (#920) Closes https://github.com/elixir-explorer/explorer/issues/902 --- lib/explorer/polars_backend/lazy_frame.ex | 2 +- test/explorer/data_frame_test.exs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 186931272..4302a0657 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -564,7 +564,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do @impl true def concat_rows([%DF{} | _tail] = dfs, %DF{} = out_df) do - polars_dfs = Enum.map(dfs, & &1.data) + polars_dfs = Enum.map(dfs, fn df -> select(df, out_df).data end) %__MODULE__{} = polars_df = Shared.apply(:lf_concat_rows, [polars_dfs]) %{out_df | data: polars_df} diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 0bda80d6d..b513c00ae 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -2759,6 +2759,17 @@ defmodule Explorer.DataFrameTest do assert Series.to_list(df4["y"]) == ~w(a b c d e f g h) ++ [nil] end + test "same dtype columns but columns not aligned" do + df1 = DF.new(x: [1, 2, 3, 4], y: ["a", "b", nil, "c"]) + df2 = DF.new(y: ["d", "e", "f", nil], x: [4, 5, 6, 7]) + df3 = DF.concat_rows(df1, df2) + + assert DF.dtypes(df3) == %{"x" => {:s, 64}, "y" => :string} + + assert Series.to_list(df3["x"]) == [1, 2, 3, 4, 4, 5, 6, 7] + assert Series.to_list(df3["y"]) == ["a", "b", nil] ++ ~w(c d e f) ++ [nil] + end + test "with incompatible columns" do df1 = DF.new(x: [1, 2, 3], y: ["a", "b", "c"])