Skip to content

Commit

Permalink
Fix decimal empty list bug (#1015)
Browse files Browse the repository at this point in the history
* Restrict decimal generation

Before we were generating values that the
Rust side couldn't handle.

* Add a decode failure hint

Sometimes this decode failure happens because
Elixir Decimals can hold a wider range of values
than Rust (Arrow) Decimals. Here, we're giving a
hint about why the decode may have failed.

* Fix `{:list, {:decimal, ...}}` for empty lists

The bug turned out to be that in the presence of
empty lists, we weren't casting when we should
have been.

* Remove decimal-related TODOs

After restricting the decimal generator such that
it only produced valid Decimals and fixing the
empty list bug, it now seems like it's safe to
include decimals in the property tests.
  • Loading branch information
billylanchantin authored Nov 14, 2024
1 parent 7c5a087 commit 5608c27
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 28 deletions.
19 changes: 14 additions & 5 deletions native/explorer/src/series/from_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ pub fn s_from_list_decimal(
.map(|ex_decimal| AnyValue::Decimal(ex_decimal.signed_coef(), ex_decimal.scale()))
.map_err(|error| {
ExplorerError::Other(format!(
"cannot decode a valid decimal from term. error: {error:?}"
"cannot decode a valid decimal from term; check that `coef` fits into an `i128`. error: {error:?}"
))
}),
TermType::Atom => Ok(AnyValue::Null),
Expand All @@ -264,13 +264,22 @@ pub fn s_from_list_decimal(

let mut series = Series::from_any_values(name.into(), &values, true)?;

if let DataType::Decimal(result_precision, result_scale) = series.dtype() {
let p: Option<usize> = Some(precision.unwrap_or(result_precision.unwrap_or(38)));
let s: Option<usize> = Some(scale.unwrap_or(result_scale.unwrap_or(0)));
match series.dtype() {
DataType::Decimal(result_precision, result_scale) => {
let p: Option<usize> = Some(precision.unwrap_or(result_precision.unwrap_or(38)));
let s: Option<usize> = Some(scale.unwrap_or(result_scale.unwrap_or(0)));

if *result_precision != p || *result_scale != s {
if *result_precision != p || *result_scale != s {
series = series.cast(&DataType::Decimal(p, s))?;
}
}
// An empty list will result in the `Null` dtype.
DataType::Null => {
let p = Some(precision.unwrap_or(38));
let s = Some(scale.unwrap_or(0));
series = series.cast(&DataType::Decimal(p, s))?;
}
other_dtype => panic!("expected dtype to be Decimal. found: {other_dtype:?}"),
}

Ok(ExSeries::new(series))
Expand Down
24 changes: 6 additions & 18 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4730,9 +4730,7 @@ defmodule Explorer.DataFrameTest do
describe "properties" do
property "should be able to create a DataFrame from valid rows" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4742,9 +4740,7 @@ defmodule Explorer.DataFrameTest do

property "should be able to create a DataFrame from valid columns" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
cols <- Explorer.Generator.columns(dtypes),
max_runs: 1_000
) do
Expand All @@ -4770,9 +4766,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame (without duration) to CSV" do
check all(
# TODO: remove `:decimal` once we fix whatever bug(s) this is
# finding.
dtypes <- Explorer.Generator.dtypes(exclude: [:decimal, :duration]),
dtypes <- Explorer.Generator.dtypes(exclude: :duration),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4785,9 +4779,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to IPC" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4800,9 +4792,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to NDJSON" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand All @@ -4815,9 +4805,7 @@ defmodule Explorer.DataFrameTest do
@tag :skip
property "can dump any DataFrame to PARQUET" do
check all(
# TODO: remove `exclude: :decimal` once we fix whatever bug(s)
# this is finding.
dtypes <- Explorer.Generator.dtypes(exclude: :decimal),
dtypes <- Explorer.Generator.dtypes(),
rows <- Explorer.Generator.rows(dtypes),
max_runs: 1_000
) do
Expand Down
4 changes: 4 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ defmodule Explorer.SeriesTest do
assert Series.dtype(s) == {:decimal, 38, 5}
end

test "{:list, {:decimal, _, _}} works with empty lists" do
Series.from_list([[Decimal.new("3.21")], []], dtype: {:list, {:decimal, 3, 2}})
end

test "mixing dates and integers with `:date` dtype" do
s = Series.from_list([1, nil, ~D[2024-06-13]], dtype: :date)

Expand Down
19 changes: 14 additions & 5 deletions test/support/generator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ defmodule Explorer.Generator do
date: constant(:date),
datetime: tuple({constant(:datetime), time_unit(), constant("Etc/UTC")}),
decimal:
bind(integer(0..37), fn scale ->
bind(integer((scale + 1)..38), fn precision ->
bind(integer(0..19), fn scale ->
bind(integer((scale + 1)..20), fn precision ->
tuple({constant(:decimal), constant(precision), constant(scale)})
end)
end),
Expand Down Expand Up @@ -397,14 +397,23 @@ defmodule Explorer.Generator do
|> map(&elem(&1, 1))
end

@max_u64 2 ** 64 - 1
@spec datetime(pos_integer(), pos_integer()) :: gen(Decimal.t())
defp decimal(precision, scale) do
# Smallest integer with `(scale + 1)` digits.
min_scale = 10 ** scale
# Largest integer with `precision` digits.
max_precision = 10 ** precision - 1

tuple({
one_of([constant("-"), constant("+")]),
string(?0..?9, min_length: precision - scale, max_length: precision - scale),
string(?0..?9, min_length: scale, max_length: scale)
integer(min_scale..min(max_precision, @max_u64))
})
|> map(fn {sign, integer_part, fractional_part} ->
|> map(fn {sign, coef} ->
# By construction, we are guaranteed that precision > scale.
{integer_part, fractional_part} =
coef |> Integer.to_string() |> String.split_at(precision - scale)

Decimal.new(sign <> integer_part <> "." <> fractional_part)
end)
end
Expand Down

0 comments on commit 5608c27

Please sign in to comment.