Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop inference of type if dtype is given and manually decode terms where needed #928

Merged
merged 5 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ defmodule Explorer.PolarsBackend.Shared do
alias Explorer.PolarsBackend.Native
alias Explorer.PolarsBackend.Series, as: PolarsSeries
alias Explorer.Series, as: Series
import Kernel, except: [apply: 2]

@polars_df [PolarsDataFrame, PolarsLazyFrame]

def apply(fun, args \\ []) do
case apply(Native, fun, args) do
case Kernel.apply(Native, fun, args) do
{:ok, value} -> value
{:error, error} -> raise runtime_error(error)
end
Expand Down Expand Up @@ -185,11 +186,11 @@ defmodule Explorer.PolarsBackend.Shared do
:boolean -> Native.s_from_list_bool(name, list)
:string -> Native.s_from_list_str(name, list)
:category -> Native.s_from_list_categories(name, list)
:date -> Native.s_from_list_date(name, list)
:time -> Native.s_from_list_time(name, list)
{:naive_datetime, precision} -> Native.s_from_list_naive_datetime(name, list, precision)
{:datetime, precision, tz} -> Native.s_from_list_datetime(name, list, precision, tz)
{:duration, precision} -> Native.s_from_list_duration(name, list, precision)
:date -> apply(:s_from_list_date, [name, list])
:time -> apply(:s_from_list_time, [name, list])
{:naive_datetime, precision} -> apply(:s_from_list_naive_datetime, [name, list, precision])
{:datetime, precision, tz} -> apply(:s_from_list_datetime, [name, list, precision, tz])
{:duration, precision} -> apply(:s_from_list_duration, [name, list, precision])
:binary -> Native.s_from_list_binary(name, list)
:null -> Native.s_from_list_null(name, length(list))
end
Expand Down
33 changes: 19 additions & 14 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ defmodule Explorer.Series do
## Options

* `:backend` - The backend to allocate the series on.
* `:dtype` - Cast the series to a given `:dtype`. By default this is `nil`, which means
* `:dtype` - Create a series of a given `:dtype`. By default this is `nil`, which means
that Explorer will infer the type from the values in the list.
See the module docs for the list of valid dtypes and aliases.

Expand Down Expand Up @@ -387,12 +387,6 @@ defmodule Explorer.Series do
s64 [nil, nil]
>

iex> Explorer.Series.from_list([1, nil], dtype: :string)
#Explorer.Series<
Polars[2]
string ["1", nil]
>

iex> Explorer.Series.from_list([1, 2], dtype: :f32)
#Explorer.Series<
Polars[2]
Expand Down Expand Up @@ -431,6 +425,14 @@ defmodule Explorer.Series do
category ["EUA", "Brazil", "Poland"]
>

It is possible to create a series of `:date` from a list of days since Unix Epoch.

iex> Explorer.Series.from_list([1, nil], dtype: :date)
#Explorer.Series<
Polars[2]
date [1970-01-02, nil]
>

It is possible to create a series of `:datetime` from a list of microseconds since Unix Epoch.

iex> Explorer.Series.from_list([1649883642 * 1_000 * 1_000], dtype: {:naive_datetime, :microsecond})
Expand All @@ -451,6 +453,15 @@ defmodule Explorer.Series do

iex> Explorer.Series.from_list([1, "a"])
** (ArgumentError) the value "a" does not match the inferred dtype {:s, 64}

But mixing integers and some of the types for `:date`, `:datetime`, `:time`, or `:duration`
will work if the desired dtype is given:

iex> Explorer.Series.from_list([1, nil, ~D[2024-06-13]], dtype: :date)
#Explorer.Series<
Polars[3]
date [1970-01-02, nil, 2024-06-13]
>
"""
@doc type: :conversion
@spec from_list(list :: list(), opts :: Keyword.t()) :: Series.t()
Expand All @@ -462,13 +473,7 @@ defmodule Explorer.Series do

type = Shared.dtype_from_list!(list, normalised_dtype)

series = backend.from_list(list, type)

case normalised_dtype do
nil -> series
^type -> series
other -> cast(series, other)
end
backend.from_list(list, type)
end

defp from_same_value(%{data: %backend{}}, value) do
Expand Down
41 changes: 1 addition & 40 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,8 @@ defmodule Explorer.Shared do

If no preferred type is given (nil), then the inferred type is returned.
"""
def dtype_from_list!(_list, :null), do: :null

def dtype_from_list!(list, nil), do: dtype_from_list!(list)

def dtype_from_list!(list, preferred_type) do
list
|> dtype_from_list!()
|> merge_preferred(preferred_type)
end
def dtype_from_list!(_list, preferred_type), do: preferred_type

@non_finite [:nan, :infinity, :neg_infinity]

Expand Down Expand Up @@ -363,38 +356,6 @@ defmodule Explorer.Shared do
{:struct, Enum.sort(types)}
end

defp merge_preferred(type, type), do: type
defp merge_preferred(:null, type), do: type
defp merge_preferred({:s, 64}, {:u, _} = type), do: type
defp merge_preferred({:s, 64}, {:s, _} = type), do: type
defp merge_preferred({:s, 64}, {:f, _} = type), do: type
defp merge_preferred({:f, 64}, {:f, _} = type), do: type
defp merge_preferred(:string, type) when type in [:binary, :string, :category], do: type

defp merge_preferred({:list, inferred}, {:list, preferred}) do
{:list, merge_preferred(inferred, preferred)}
end

defp merge_preferred({:struct, inferred}, {:struct, preferred}) do
{remaining, all_merged} =
Enum.reduce(preferred, {inferred, []}, fn {col, dtype}, {inferred_rest, merged} ->
case List.keytake(inferred_rest, col, 0) do
{{^col, inferred_dtype}, rest} ->
solved = merge_preferred(inferred_dtype, dtype)
{rest, List.keystore(merged, col, 0, {col, solved})}

nil ->
{inferred, List.keystore(merged, col, 0, {col, dtype})}
end
end)

{:struct, all_merged ++ remaining}
end

defp merge_preferred(inferred, _preferred) do
inferred
end

@doc """
Returns the leaf dtype from a {:list, _} dtype, or itself.
"""
Expand Down
1 change: 1 addition & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub use error::ExplorerError;
use expressions::*;
use lazyframe::io::*;
use lazyframe::*;
use series::from_list::*;
use series::log::*;
use series::*;

Expand Down
Loading
Loading