Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix decimal empty list bug #1015

Merged
merged 4 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions native/explorer/src/series/from_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ pub fn s_from_list_decimal(
.map(|ex_decimal| AnyValue::Decimal(ex_decimal.signed_coef(), ex_decimal.scale()))
.map_err(|error| {
ExplorerError::Other(format!(
"cannot decode a valid decimal from term. error: {error:?}"
"cannot decode a valid decimal from term; check that `coef` fits into an `i128`. error: {error:?}"
))
}),
TermType::Atom => Ok(AnyValue::Null),
Expand All @@ -264,13 +264,22 @@ pub fn s_from_list_decimal(

let mut series = Series::from_any_values(name.into(), &values, true)?;

if let DataType::Decimal(result_precision, result_scale) = series.dtype() {
let p: Option<usize> = Some(precision.unwrap_or(result_precision.unwrap_or(38)));
let s: Option<usize> = Some(scale.unwrap_or(result_scale.unwrap_or(0)));
match series.dtype() {
DataType::Decimal(result_precision, result_scale) => {
let p: Option<usize> = Some(precision.unwrap_or(result_precision.unwrap_or(38)));
let s: Option<usize> = Some(scale.unwrap_or(result_scale.unwrap_or(0)));

if *result_precision != p || *result_scale != s {
if *result_precision != p || *result_scale != s {
series = series.cast(&DataType::Decimal(p, s))?;
}
}
// An empty list will result in the `Null` dtype.
DataType::Null => {
let p = Some(precision.unwrap_or(38));
let s = Some(scale.unwrap_or(0));
series = series.cast(&DataType::Decimal(p, s))?;
}
other_dtype => panic!("expected dtype to be Decimal. found: {other_dtype:?}"),
}

Ok(ExSeries::new(series))
Expand Down
24 changes: 6 additions & 18 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4730,9 +4730,7 @@ defmodule Explorer.DataFrameTest do
describe "properties" do
property "should be able to create a DataFrame from valid rows" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4742,9 +4740,7 @@ defmodule Explorer.DataFrameTest do

property "should be able to create a DataFrame from valid columns" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
cols <- Explorer.Generator.columns(dtypes),
max_runs: 1_000
) do
Expand All @@ -4770,9 +4766,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame (without duration) to CSV" do
check all(
# TODO: remove `:decimal` once we fix whatever bug(s) this is
# finding.
dtypes <- Explorer.Generator.dtypes(exclude: [:decimal, :duration]),
dtypes <- Explorer.Generator.dtypes(exclude: :duration),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4785,9 +4779,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to IPC" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4800,9 +4792,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to NDJSON" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4815,9 +4805,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to PARQUET" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand Down
4 changes: 4 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ defmodule Explorer.SeriesTest do
assert Series.dtype(s) == {:decimal, 38, 5}
end

test "{:list, {:decimal, _, _}} works with empty lists" do
Series.from_list([[Decimal.new("3.21")], []], dtype: {:list, {:decimal, 3, 2}})
end

test "mixing dates and integers with `:date` dtype" do
s = Series.from_list([1, nil, ~D[2024-06-13]], dtype: :date)

Expand Down
19 changes: 14 additions & 5 deletions test/support/generator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ defmodule Explorer.Generator do
date: constant(:date),
datetime: tuple({constant(:datetime), time_unit(), constant("Etc/UTC")}),
decimal:
bind(integer(0..37), fn scale ->
bind(integer((scale + 1)..38), fn precision ->
bind(integer(0..19), fn scale ->
bind(integer((scale + 1)..20), fn precision ->
tuple({constant(:decimal), constant(precision), constant(scale)})
end)
end),
Expand Down Expand Up @@ -397,14 +397,23 @@ defmodule Explorer.Generator do
|> map(&elem(&1, 1))
end

@max_u64 2 ** 64 - 1
@spec datetime(pos_integer(), pos_integer()) :: gen(Decimal.t())
defp decimal(precision, scale) do
# Smallest integer with `(scale + 1)` digits.
min_scale = 10 ** scale
# Largest integer with `precision` digits.
max_precision = 10 ** precision - 1

tuple({
one_of([constant("-"), constant("+")]),
string(?0..?9, min_length: precision - scale, max_length: precision - scale),
string(?0..?9, min_length: scale, max_length: scale)
integer(min_scale..min(max_precision, @max_u64))
})
|> map(fn {sign, integer_part, fractional_part} ->
|> map(fn {sign, coef} ->
# By construction, we are guaranteed that precision > scale.
{integer_part, fractional_part} =
coef |> Integer.to_string() |> String.split_at(precision - scale)

Decimal.new(sign <> integer_part <> "." <> fractional_part)
end)
end
Expand Down
Loading