output/results.Rmd

---
title: "Characterising information loss due to aggregating epidemic model outputs"
output: html_document
---

```{r set-up, include=FALSE}
# Set up Rmarkdown and workspace -----
library(here)
library(dplyr)
library(tidyr)
library(purrr)
library(scoringutils)
library(ggplot2)
library(cowplot)
library(patchwork)
knitr::opts_chunk$set(eval = TRUE, echo = FALSE,
                      message = FALSE, warning = FALSE,
                      eval.after = "fig.cap")
options(digits = 2)

local <- TRUE # FALSE = download data from hub git remote, TRUE = use copy in this repo

# import functions
source(here("code", "import-results.R"))
source(here("code", "create-ensembles.R"))

# Quantiles for vincent & LOP ensembles (standard hub submission format)
quantiles <- c(0.01, 0.025, 
               seq(0.05, 0.95, by = 0.05), 
               0.975, 0.99)

# Prettier formatting -----
# targets
target_levels <- c("BE inc case", "NL inc case", "ES inc case", "BE inc death", "NL inc death")
target_labels <- c("Belgium cases", "Netherlands cases", "Spain cases", "Belgium deaths", "Netherlands deaths")
names(target_levels) <- target_labels
names(target_labels) <- target_levels

# Quantiles for plotting
quantile_plotting <- paste0("q", c(0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99))

# colours for scenarios
scenario_colours <- c("A" = "#e66101",
                      "B" = "#ca0020",
                      "C" = "#0571b0",
                      "D" = "#5e3c99",
                      "Weighted" = "grey50")
theme_set(theme_cowplot(font_size = 10, 
                        rel_small = 1, rel_tiny = 1, rel_large = 1))
```


## Results

```{r load-samples}
# Load samples from all models together with observed data
results <- import_projections(round = 2, n_model_min = 3,
                              local = local) |> 
  mutate(target = ordered(x = paste(location, target_variable),
                          levels = target_levels,
                          labels = target_labels))
```

```{r create-simple-ensembles, warning=FALSE}
# Create two simple ensembles ("Sample", "Quantile")
simple_ensembles <- create_simple_ensembles(results = results, 
                                            quantiles = quantiles)
# Create linear pool ensemble
linear_pool_ensemble <- create_lop_ensemble(results = results,
                                            quantiles = quantiles)
ensembles <- bind_rows(simple_ensembles, linear_pool_ensemble) |> 
  mutate(target = ordered(x = paste(location, target_variable),
                          levels = target_levels,
                          labels = target_labels)) |> 
  select(-c(n, output_type))
```

#### Comparison of all ensembles

```{r plot-simple, warning=FALSE, fig.dim=c(12,8)}
# Reshape data for plotting -----
# ensembles
plot_ensembles <- ensembles |>
    filter(quantile %in% quantile_plotting) |> 
    pivot_wider(names_from = quantile) |>
    mutate(median = q0.5) |>
    select(-q0.5)

  # samples
  plot_samples <- results |>
    mutate(model_sample = paste(location, target_variable, scenario_id,
                                model, sample,
                                sep = "-"),
           model = "None") |> # relabel model to None
    rename(q0.5 = value_100k) |>
    select(location, target_variable,
           target_end_date, scenario_id,
           model, model_sample, q0.5)

  # combine data and observations
  ensembles_samples_plot <- bind_rows(plot_ensembles, plot_samples) |>
    left_join(distinct(results,
                       location, target_variable,
                       target_end_date, obs_100k),
              by = c("location", "target_variable", "target_end_date"))

  # Format for plotting -----------------------------------------------------
  ensembles_samples_plot <- ensembles_samples_plot |>
    # set order for facet rows
    mutate(model = ordered(model,
                           levels = c("None",
                                      "Trajectories",
                                      "Linear pool",
                                      "Quantiles"),
                           labels = c("i. All models' simulated trajectories",
                                      "ii. Ensemble from trajectories",
                                      "Linear pool",
                                      "iii. Ensemble from models' quantiles")),
      # avoid overplotting observed data
      obs_100k = ifelse(model %in% c("Quantiles", 
                                          "Trajectories", "Linear pool"), 
                             NA, obs_100k),
      target = ordered(x = paste(location, target_variable),
                       levels = target_levels, labels = target_labels)) |> 
    # remove LOP ensemble to reduce size of figure (identical to (ii))
    filter(model != "Linear pool")

  # Plot --------------------------------------------------------------
  plot_ensembles_samples <- ensembles_samples_plot |>
    ggplot(aes(x = target_end_date,
               fill = scenario_id, col = scenario_id)) +
    # ----- Geoms
    # ensembles
    geom_ribbon(aes(ymin = q0.01, ymax = q0.99),
                alpha = 0.1, col = NA) +
    geom_ribbon(aes(ymin = q0.25, ymax = q0.75),
                alpha = 0.4, col = NA) +
    geom_line(aes(y = median), size = 1) +
    # model samples
    geom_line(aes(y = q0.5, group = model_sample),
              alpha = 0.1) +
    # observed data as points
    geom_point(aes(y = obs_100k),
               colour = "grey20", size = 0.6, show.legend = FALSE) +
    # ----- Structure
    # facets
    facet_grid(rows = vars(target), cols = vars(model),
               scales = "free", switch = "y") +
    # labels
    labs(x = NULL, y = "Incidence per 100k",
         colour = "Scenario", fill = "Scenario") +
    # colours and scales
    scale_colour_manual(values = scenario_colours,
                        aesthetics = c("colour", "fill")) +
    scale_x_date(breaks = "3 month", date_labels = "%b '%y") +
    # theme
    theme(legend.position = "bottom",
          strip.background = element_blank(),
          strip.placement = "outside")

ggsave(filename = here("output", "figures", "figure-1.jpg"),
       plot = plot_ensembles_samples, 
       width = 8, height = 8)
  
plot_ensembles_samples
```

#### Conditioning samples on an increasing amount of data

```{r weekly-ensembles, warning=FALSE}
# Create set of weekly ensembles with progressively increasing observed data
weekly <- create_weekly_ensembles(results)
weekly_ensembles <- weekly$ensembles |>
  filter(horizon >= 0) |> 
  mutate(weighting = ifelse(scenario_id == "Unweighted", "Unweighted", 
                            paste0(horizon, " weeks ago")),
         target = ordered(x = paste(location, target_variable),
                            levels = target_levels,
                            labels = target_labels))
# Include observed data
weekly_ensembles <- weekly_ensembles |>
    full_join(results |>
                distinct(target, target_end_date, obs_100k), 
              by = c("target", "target_end_date"))
```


```{r plot-weekly-ensembles, warning=FALSE, fig.dim=c(8,8)}
# Plot -------------------------------------------
# set up colours
horizon_cols <- c("16 weeks ago" = "#a1dab4", 
                  "8 weeks ago" = "#41b6c4", 
                  "4 weeks ago" = "#225ea8",
                  "Unweighted" = "grey")
# Plot
# ----- Shape data for plotting
weekly_ensemble_plotdata <- weekly_ensembles |>
  mutate(target = ordered(x = paste(location, target_variable),
                          levels = target_levels,
                          labels = target_labels),
         weighting = ordered(x = weighting,
                             levels = names(horizon_cols))) |> 
  select(-c(location, target_variable)) |> 
  group_by(target, forecast_date, target_end_date) |> 
  filter(quantile %in% c("q0.01", "q0.25", "q0.5", "q0.75", "q0.99")) |> 
  pivot_wider(values_from = value,
                names_from = quantile) |>
  rename(median = q0.5) |> 
  filter(!is.na(weighting))
# split out weighted/unweighted so they can have separate geoms
weekly_ensemble_weighted <- weekly_ensemble_plotdata |>
  filter(weighting %in% c("16 weeks ago", "8 weeks ago", "4 weeks ago")) 
weekly_ensemble_unweighted <- weekly_ensemble_plotdata |>
  filter(scenario_id == "Unweighted")

# ----- Plot
weekly_ensemble_plot <- weekly_ensemble_weighted |> 
    ggplot(aes(x = target_end_date,
               group = weighting,
               col = weighting,
               fill = weighting)) +
    # ----- Geoms
    # unweighted ensemble
    geom_ribbon(aes(ymin = q0.01, ymax = q0.99),
                col = NA,
                alpha = 0.3, 
                data = weekly_ensemble_unweighted) +
    # weighted ensembles
    geom_ribbon(aes(ymin = q0.01, ymax = q0.99),
                col = NA,
                alpha = 0.3) +
    geom_ribbon(aes(ymin = q0.25, ymax = q0.75),
                col = NA,
                alpha = 0.6) +
    #geom_line(aes(y = median), alpha = 0.8, lwd = 1) +
    # observed data as points
    geom_point(aes(y = obs_100k),
               colour = "grey20", size = 0.6,
               show.legend = FALSE) +
    # show start date of weighted forecasting
    geom_vline(xintercept = as.Date("2022-08-27"),
               lty = 2) +
    # ----- Structure
    # facets
    facet_grid(rows = vars(target),
               scales = "free", switch = "y") +
    # labels
    labs(x = NULL, y = "Incidence per 100k",
         fill = "Conditioned on data up to",
         col = "Conditioned on data up to") +
    # scales
    scale_x_date(limits = c(min(results$target_end_date),
                            as.Date("2023-03-11")),
                 breaks = "1 month", date_labels = "%b '%y") +
    scale_fill_discrete(breaks = names(horizon_cols), type = horizon_cols) +
    # theme
    theme(legend.position = "bottom",
          strip.background = element_blank(),
          strip.placement = "outside")

ggsave(filename = here("output", "figures", "figure-2.jpg"),
       plot = weekly_ensemble_plot, 
       width = 8, height = 7)

weekly_ensemble_plot
```

```{r score-weekly-ensembles}
# Format forecasts and observations for scoring
forecasts <- weekly_ensembles |>
  mutate(model = scenario_id,
         quantile = as.numeric(gsub(pattern = "q", replacement = "",
                                    x = quantile)),
         forecast_date = as.Date(forecast_date)) |>
  rename(prediction = value,
         true_value = obs_100k) |>
  # remove where no data (after March 2023)
  filter(target_end_date <= as.Date("2023-03-04") &
           !is.na(model))

# Score forecasts on log scale
scores <- forecasts |>
  mutate(
    scale = "log",
    true_value = log(true_value + 1e-05), #1/100000
    prediction = log(pmax(prediction, 0) + 1e-05)) |>
  score(metrics = c("interval_score")) |>
  summarise_scores(by = c("model", "n_weeks_scored",
                          "target"),
                   na.rm = TRUE)

# Score all forecasts relative to each other (pairwise) -----
score_pairwise_raw <- pairwise_comparison(scores = scores,
                                      metric = "interval_score",
                                      baseline = "Unweighted",
                                      by = c("n_weeks_scored", 
                                             "target"))

score_pairwise <- score_pairwise_raw |> 
  filter(compare_against == "Unweighted" & 
           model == "Weighted") |>
  select(model, n_weeks_scored, target, 
         rel_wis = scaled_rel_skill) |> 
  mutate(target_label = ifelse(n_weeks_scored == 31, as.character(target), NA))

# Summary stats -----
print("Summary of all weighted forecasts' relative WIS")
print("At 4, 8, 16, and 31 weeks' data")
score_pairwise |> 
  filter(n_weeks_scored %in% c(4,8,16,31)) |> 
  group_by(n_weeks_scored) |> 
  summarise(median = median(rel_wis))
  
pivot_wider(names_from = n_weeks_scored, 
              values_from = rel_wis) |> 
  knitr::kable()
print("Summary across time")
print(summary(score_pairwise$rel_wis))
tapply(score_pairwise$rel_wis, score_pairwise$target, summary)  
```

```{r plot-scored-weekly-ensembles, fig.height=6, fig.width=8, warning=FALSE}
# Plot --------------------------------------------------------------------
scores_plot <- score_pairwise |>
  # Plot
  ggplot(aes(x = n_weeks_scored, y = rel_wis, col = target)) +
  geom_line(lwd = 0.8) +
  geom_hline(yintercept = 1, lty = 2, col = horizon_cols["Unweighted"]) +
  geom_vline(xintercept = 4, lty = 2, col = horizon_cols["4 weeks ago"]) +
  geom_vline(xintercept = 8, lty = 2, col = horizon_cols["8 weeks ago"]) +
  geom_vline(xintercept = 16, lty = 2, col = horizon_cols["16 weeks ago"]) +
  scale_x_continuous(breaks = seq(0,32,by=4)) +
  scale_color_brewer(type = "qual", palette = "Dark2") +
  ggrepel::geom_label_repel(aes(label = target_label, rm.na = TRUE),  
                            segment.colour = NA, 
                            nudge_x = 7, direction = "y",
                            size = 4) +
  labs(y = "Relative WIS performance", 
       x = "Number of weeks' known trajectory accuracy used in weighting an ensemble forecast",
       col = "Target") +
  theme(legend.position = "none")

# title = "Performance of weighted forecasts by weeks' data used",
# caption = "Forecasting performance of ensembles, forecasting weekly over the future period (to 22 July 2023) \n using increasing weeks of data to weight each trajectory. Performance measured by the weighted interval score (WIS) of each weighted ensemble, relative to an equivalent ensemble with equal weights for each trajectory (reference line at 1)"

ggsave(filename = here("output", "figures", "figure-3.jpg"), 
       plot = scores_plot,
       width = 8, height = 6)

scores_plot
```

# Supplementary Information

```{r width-ensembles, fig.width = 8}
get_interval_widths <- function(ensemble) {
  interval_ensembles <- ensemble |>
  mutate(quantile = as.numeric(as.character(sub("q0", "", quantile))),
         interval = round(2 * abs(0.5 - quantile), 2),
         type = if_else(quantile <= 0.5, "lower", "upper"),
         target = paste(location, gsub("inc ", "", 
                                       target_variable)))
duplicate_median <- interval_ensembles |>
  filter(quantile == 0.5) |>
  mutate(type = "upper")
width <- interval_ensembles |>
  bind_rows(duplicate_median) |>
  select(-quantile) |>
  pivot_wider(names_from = "type") |>
  # Average across all scenarios and dates
  group_by(target, model, interval) |>
  summarise(upper = mean(upper),
            lower = mean(lower),
            .groups = "drop")
return(width)
}

# Look at unweighted ensemble comparison
width <- get_interval_widths(ensembles)
width_plot <- width |>
  ggplot(aes(x = interval,
             ymin = lower, ymax = upper,
             group = model,
             colour = model, fill = model)) +
  geom_ribbon(alpha = 0.25) +
  geom_linerange(alpha = 0.25) +
  geom_point(aes(y = lower), alpha = 0.5) +
  geom_point(aes(y = upper), alpha = 0.5) +
  scale_colour_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  labs(y = "Mean lower and upper incidence per 100k",
       x = "Interval width around median",
       fill = "Ensemble source:",
       colour = "Ensemble source:") +
  facet_grid(rows = vars(target),
             scales = "free_y", switch = "y") +
  theme(legend.position = "bottom",
        strip.background = element_blank(),
        strip.placement = "outside")

# Repeat for comparing weighted ensembles
earliest_target <- filter(weekly_ensembles, horizon == 16) |>
  pull(target_end_date) |> min()
latest_target <- filter(weekly_ensembles, horizon == 4) |>
  pull(target_end_date) |> max()
weekly_widths <- weekly_ensembles |>
  mutate(model = weighting) |>
  filter(between(target_end_date, earliest_target, latest_target)) |>
  get_interval_widths()
weekly_width_upper <- weekly_widths |>
  select(-lower) |>
  group_by(target, interval) |>
  pivot_wider(names_from = model, values_from = upper) |>
  mutate(diff = (`4 weeks ago` - `16 weeks ago`)/`16 weeks ago`)

weekly_width_plot <- weekly_widths |>
  filter(model %in% c("4 weeks ago", "8 weeks ago", "16 weeks ago")) |> 
  ggplot(aes(x = interval,
             ymin = lower, ymax = upper,
             group = model,
             colour = model, fill = model)) +
  geom_ribbon(alpha = 0.25) +
  geom_linerange(alpha = 0.25) +
  geom_point(aes(y = lower), alpha = 0.5) +
  geom_point(aes(y = upper), alpha = 0.5) +
  scale_colour_manual(values = horizon_cols, aesthetics = c("fill", "colour")) +
  labs(x = "Interval width around median",
       y = NULL,
       fill = "Conditioned on data:",
       colour = "Conditioned on data:") +
  facet_grid(rows = vars(target),
             scales = "free_y", switch = "y") +
  theme(legend.position = "bottom",
        strip.background = element_blank(),
        strip.placement = "outside")

combined_width_plot <- width_plot +
  weekly_width_plot +
  plot_annotation(tag_levels = "A")

# Save/print
ggsave(filename = here("output", "figures", "SI-figure-1.jpg"),
       plot = combined_width_plot, 
       width = 12, height = 8)

combined_width_plot
```


```{r boxplot-scored-weekly-ensembles, warning=FALSE}
scores_boxplot <- score_pairwise |> 
  ggplot(aes(x = target, y = rel_wis, col = target)) +
  geom_boxplot() +
  geom_hline(yintercept = 1, lty = 2) +
  labs(x = NULL, y = "Relative WIS") +
  coord_flip() +
  scale_color_brewer(palette = "Dark2") +
  theme(legend.position = "none")

ggsave(filename = here("output", "figures", "SI-figure-2.jpg"),
       plot = scores_boxplot, 
       width = 7, height = 5)
scores_boxplot
```

```{r create-supplement}
# Add information on contributing teams
rmarkdown::render(here("output", "supplement.Rmd"),
                  output_file = here("output", "supplement", "supplement-part-1.pdf"))
# Create a combined PDF supplement file
pdftools::pdf_combine(c(here("output", "supplement", "supplement-part-1.pdf"),
              here("output", "supplement", "Round 2 realtime report.pdf")),
            output = here("output", "supplement.pdf"))
```