Apply stricter .eval_time checking for dynamic survival metrics #468

EmilHvitfeldt · 2023-12-13T23:24:10Z

to close #462

library(yardstick)

brier_survival(lung_surv, truth = surv_obj, .pred)
#> # A tibble: 5 × 4
#>   .metric        .estimator .eval_time .estimate
#>   <chr>          <chr>           <dbl>     <dbl>
#> 1 brier_survival standard          100     0.109
#> 2 brier_survival standard          200     0.194
#> 3 brier_survival standard          300     0.219
#> 4 brier_survival standard          400     0.222
#> 5 brier_survival standard          500     0.197

roc_auc_survival(lung_surv, truth = surv_obj, .pred)
#> # A tibble: 5 × 4
#>   .metric          .estimator .eval_time .estimate
#>   <chr>            <chr>           <dbl>     <dbl>
#> 1 roc_auc_survival standard          100     0.659
#> 2 roc_auc_survival standard          200     0.679
#> 3 roc_auc_survival standard          300     0.688
#> 4 roc_auc_survival standard          400     0.648
#> 5 roc_auc_survival standard          500     0.662

roc_curve_survival(lung_surv, truth = surv_obj, .pred)
#> # A tibble: 650 × 4
#>    .threshold sensitivity specificity .eval_time
#>         <dbl>       <dbl>       <dbl>      <dbl>
#>  1   -Inf          0            1            100
#>  2      0.615      0            0.995        100
#>  3      0.723      0.0334       0.985        100
#>  4      0.727      0.133        0.985        100
#>  5      0.730      0.167        0.985        100
#>  6      0.732      0.233        0.980        100
#>  7      0.739      0.233        0.969        100
#>  8      0.741      0.267        0.964        100
#>  9      0.746      0.267        0.959        100
#> 10      0.748      0.267        0.954        100
#> # ℹ 640 more rows

brier_survival_integrated(lung_surv, truth = surv_obj, .pred)
#> # A tibble: 1 × 3
#>   .metric                   .estimator .estimate
#>   <chr>                     <chr>          <dbl>
#> 1 brier_survival_integrated standard       0.158

lung_surv_neg <- lung_surv
lung_surv_neg$.pred[[1]]$.eval_time[1] <- -100

brier_survival(lung_surv_neg, truth = surv_obj, .pred)
#> Error in `brier_survival()`:
#> ✖ Negative values of .eval_time are not allowed.
#> ℹ The following negative values were found: -100.

roc_auc_survival(lung_surv_neg, truth = surv_obj, .pred)
#> Error in `roc_auc_survival()`:
#> ✖ Negative values of .eval_time are not allowed.
#> ℹ The following negative values were found: -100.

roc_curve_survival(lung_surv_neg, truth = surv_obj, .pred)
#> Error in `roc_curve_survival()`:
#> ✖ Negative values of .eval_time are not allowed.
#> ℹ The following negative values were found: -100.

brier_survival_integrated(lung_surv_neg, truth = surv_obj, .pred)
#> Error in `brier_survival_integrated()`:
#> ✖ Negative values of .eval_time are not allowed.
#> ℹ The following negative values were found: -100.

lung_surv_na <- lung_surv
lung_surv_na$.pred[[1]]$.eval_time[1] <- NA

brier_survival(lung_surv_na, truth = surv_obj, .pred)
#> Error in `brier_survival()`:
#> ✖ Missing values in .eval_time are not allowed.

roc_auc_survival(lung_surv_na, truth = surv_obj, .pred)
#> Error in `roc_auc_survival()`:
#> ✖ Missing values in .eval_time are not allowed.

roc_curve_survival(lung_surv_na, truth = surv_obj, .pred)
#> Error in `roc_curve_survival()`:
#> ✖ Missing values in .eval_time are not allowed.

brier_survival_integrated(lung_surv_na, truth = surv_obj, .pred)
#> Error in `brier_survival_integrated()`:
#> ✖ Missing values in .eval_time are not allowed.

lung_surv_inf <- lung_surv
lung_surv_inf$.pred[[1]]$.eval_time[1] <- Inf

brier_survival(lung_surv_inf, truth = surv_obj, .pred)
#> Error in `brier_survival()`:
#> ✖ Infinite values of .eval_time are not allowed.

roc_auc_survival(lung_surv_inf, truth = surv_obj, .pred)
#> Error in `roc_auc_survival()`:
#> ✖ Infinite values of .eval_time are not allowed.

roc_curve_survival(lung_surv_inf, truth = surv_obj, .pred)
#> Error in `roc_curve_survival()`:
#> ✖ Infinite values of .eval_time are not allowed.

brier_survival_integrated(lung_surv_inf, truth = surv_obj, .pred)
#> Error in `brier_survival_integrated()`:
#> ✖ Infinite values of .eval_time are not allowed.

lung_surv_duplicate <- lung_surv
lung_surv_duplicate$.pred[[1]]$.eval_time[1] <- 200

brier_survival(lung_surv_duplicate, truth = surv_obj, .pred)
#> Error in `brier_survival()`:
#> ✖ Duplicate values of .eval_time are not allowed.

roc_auc_survival(lung_surv_duplicate, truth = surv_obj, .pred)
#> Error in `roc_auc_survival()`:
#> ✖ Duplicate values of .eval_time are not allowed.

roc_curve_survival(lung_surv_duplicate, truth = surv_obj, .pred)
#> Error in `roc_curve_survival()`:
#> ✖ Duplicate values of .eval_time are not allowed.

brier_survival_integrated(lung_surv_duplicate, truth = surv_obj, .pred)
#> Error in `brier_survival_integrated()`:
#> ✖ Duplicate values of .eval_time are not allowed.

lung_surv_order <- lung_surv
lung_surv_order$.pred <- purrr::map(lung_surv_order$.pred, dplyr::arrange, desc(.eval_time))

brier_survival(lung_surv_order, truth = surv_obj, .pred)
#> Error in `brier_survival()`:
#> ✖ Values of .eval_time must be in increasing order.

roc_auc_survival(lung_surv_order, truth = surv_obj, .pred)
#> Error in `roc_auc_survival()`:
#> ✖ Values of .eval_time must be in increasing order.

roc_curve_survival(lung_surv_order, truth = surv_obj, .pred)
#> Error in `roc_curve_survival()`:
#> ✖ Values of .eval_time must be in increasing order.

brier_survival_integrated(lung_surv_order, truth = surv_obj, .pred)
#> Error in `brier_survival_integrated()`:
#> ✖ Values of .eval_time must be in increasing order.

^{Created on 2023-12-13 with reprex v2.0.2}

EmilHvitfeldt · 2023-12-13T23:33:36Z

There is no NEWS bullet since these functions are not official yet

hfrick

Looks good! One important question about the ordering of eval_time though, see the comments 🙌

hfrick · 2023-12-14T15:34:51Z

R/validation.R

+    cli::cli_abort(
+      c(
+        x = "Negative values of {.field .eval_time} are not allowed.",
+        i = "The following negative values were found: {.val {offenders}}."


hfrick · 2023-12-14T15:40:51Z

R/validation.R

+  any_not_in_order <- any(
+    vapply(all_eval_times_list, function(x) is.unsorted(x), logical(1))
+  )
+  if (any_not_in_order) {
+    cli::cli_abort(
+      c(
+        x = "Values of {.field .eval_time} must be in increasing order."
+      ),
+      call = call
+    )
+  }


In tune, we explicitly make use of the order of time points in eval_time: for Bayesian optimization and the tuning functions in finetune (racing etc), we can only optimize for one time point so the functions use the first one. If you want to optimize for 10 but also want the metrics to be calculated for 5, you would set eval_time = c(10, 5). Is the order a requirement from yardstick? If so, we need to decide where the re-ordering should happen. If not, we can just remove this check.

lets remove the check + add tests to make sure that out of order eval_time works in {yardstick}

github-actions · 2023-12-29T00:47:59Z

This pull request has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.

EmilHvitfeldt added 5 commits December 13, 2023 14:32

validate that .eval_time can't be negative

56b5b45

validate that .eval_time can't be NA

cfd53d0

validate that .eval_time can't be infinite

08db6eb

validate that .eval_time can't have duplicate values

f1708f2

validate that .eval_time are in order

feda12b

EmilHvitfeldt requested a review from hfrick December 13, 2023 23:28

hfrick reviewed Dec 14, 2023

View reviewed changes

remove out of order validation of eval_time

b833b52

EmilHvitfeldt merged commit ce9da1d into main Dec 14, 2023
12 checks passed

EmilHvitfeldt deleted the fix462 branch December 14, 2023 19:03

github-actions bot locked and limited conversation to collaborators Dec 29, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Apply stricter .eval_time checking for dynamic survival metrics #468

Apply stricter .eval_time checking for dynamic survival metrics #468

EmilHvitfeldt commented Dec 13, 2023

EmilHvitfeldt commented Dec 13, 2023

hfrick left a comment

hfrick Dec 14, 2023

hfrick Dec 14, 2023

EmilHvitfeldt Dec 14, 2023

github-actions bot commented Dec 29, 2023

Apply stricter .eval_time checking for dynamic survival metrics #468

Apply stricter .eval_time checking for dynamic survival metrics #468

Conversation

EmilHvitfeldt commented Dec 13, 2023

EmilHvitfeldt commented Dec 13, 2023

hfrick left a comment

Choose a reason for hiding this comment

hfrick Dec 14, 2023

Choose a reason for hiding this comment

hfrick Dec 14, 2023

Choose a reason for hiding this comment

EmilHvitfeldt Dec 14, 2023

Choose a reason for hiding this comment

github-actions bot commented Dec 29, 2023