Skip to content

Commit

Permalink
add support2 application
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasKook committed Oct 6, 2023
1 parent 4e81968 commit af26e5b
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ inst/ignore
inst/results*
*.pdf
*.sh
*.sav

5 changes: 4 additions & 1 deletion inst/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ identifiability:
invariance-example:
(cd .. && Rscript --vanilla inst/code/invariance-example.R)

case-study:
(cd .. && R CMD BATCH --vanilla inst/code/case-study.R)

sim-correct:
(cd .. && Rscript --vanilla inst/code/run-simulation.R 1 10 10 100 0 $(ROW))

Expand Down Expand Up @@ -59,7 +62,7 @@ vis-wald:
vis-larger:
(cd .. && Rscript --vanilla inst/code/vis-simulation.R 6)

all: intro faithfulness gcm-smooth gcm-penalized identifiability invariance-example
all: intro faithfulness gcm-smooth gcm-penalized identifiability invariance-example case-study

vis: vis-main vis-app vis-hidden vis-link vis-larger

Expand Down
165 changes: 165 additions & 0 deletions inst/code/case-study.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Case study: SUPPORT2 data
# LK 2023

# Dependencies ------------------------------------------------------------

library("tramicp")
library("tidyverse")
library("forcats")
library("survival")
library("tram")

bpath <- file.path("inst", "results", "case-study")

# Options -----------------------------------------------------------------

### Don't fit/test interactions for TRAMICP-Wald
options("wald_test_interactions" = FALSE)

# Load data ---------------------------------------------------------------

load("inst/data/support2.sav")

### Preprocessing
nrow(support2)
dat <- support2 |>
mutate(surv = Surv(d.time, death)) |>
select(all_of(c("surv", "num.co", "sex", "race", "scoma", "dzgroup", "ca",
"diabetes", "dementia", "age"))) %>%
mutate(num.co = fct_lump_min(factor(num.co), 100),
scoma = factor(scoma), age = sqrt(age)) %>%
mutate_if(is.factor, ~ factor(.x, levels = levels(.x),
labels = make.names(levels(.x)),
ordered = FALSE)) %>%
na.omit()
nrow(dat) # 43 patients removed due to missingness

# The set of all predictors is not invariant ------------------------------

summary(coxph(surv ~ scoma + dzgroup + ca + sex + race + num.co +
age + diabetes + dementia, data = dat))

# Evidence of age and cancer being direct causes of time-to-death ---------

fm <- surv ~ scoma + dzgroup + ca + age + diabetes + dementia + sex + race
fe <- ~ num.co
if (file.exists(file.path(bpath, "gcm.rds"))) {
gcm <- readRDS(file.path(bpath, "gcm.rds"))
} else {
gcm <- coxphICP(formula = fm, data = dat, env = fe, test = "gcm.test")
saveRDS(gcm, file.path(bpath, "gcm.rds"))
}
gcm
(wald <- coxphICP(formula = fm, data = dat, env = fe, type = "wald"))

# Multiple environments ---------------------------------------------------

tfe <- ~ num.co + race
tfm <- surv ~ scoma + dzgroup + ca + age + diabetes + dementia + sex
if (file.exists(file.path(bpath, "tgcm.rds"))) {
tgcm <- readRDS(file.path(bpath, "tgcm.rds"))
} else {
tgcm <- coxphICP(formula = tfm, data = dat, env = tfe, test = "gcm.test")
saveRDS(tgcm, file.path(bpath, "tgcm.rds"))
}
tgcm
(twald <- coxphICP(formula = tfm, data = dat, env = tfe, type = "wald"))


# Incorporating prior knowledge about direct causes -----------------------

ffm <- surv ~ scoma + dzgroup + ca + sex + race
fmand <- ~ age + diabetes + dementia
if (file.exists(file.path(bpath, "fgcm.rds"))) {
fgcm <- readRDS(file.path(bpath, "fgcm.rds"))
} else {
fgcm <- coxphICP(formula = ffm, data = dat, env = fe, test = "gcm.test",
mandatory = fmand)
saveRDS(fgcm, file.path(bpath, "fgcm.rds"))
}
fgcm
(fwald <- coxphICP(formula = ffm, data = dat, env = fe, type = "wald",
mandatory = fmand))

# Create Table ------------------------------------------------------------

tab <- lapply(list(gcm, wald, tgcm, twald, fgcm, fwald),
\(mod) {
tmp <- pvalues(gcm)
tmp[] <- NA
pvals <- pvalues(mod)
tmp[names(pvals)] <- pvals
tmp
}) |> bind_rows()
tab
knitr::kable(tab, booktabs = TRUE, format = "latex", digits = 3L)

# Create Figure -----------------------------------------------------------

pdat <- data.frame(set = names(pvalues(gcm, "set")),
gcm = pvalues(gcm, "set"),
wald = pvalues(wald, "set")) |>
mutate(caage = grepl("age", set) & grepl("ca", set),
invariant = gcm > 0.05 | wald > 0.05)

p1 <- ggplot(pdat, aes(x = gcm, y = wald, shape = caage)) +
geom_point(size = 2, alpha = 0.8) +
annotate("rect", xmin = 0, xmax = 0.05, ymin = 0, ymax = 1, fill = "darkblue", alpha = 0.1) +
annotate("rect", ymin = 0, ymax = 0.05, xmin = 0, xmax = 1, fill = "darkred", alpha = 0.1) +
theme_bw() +
labs(x = "TRAM-GCM p-value", y = "TRAM-Wald p-value", shape = "Set contains both ca and age") +
scale_shape_manual(values = c("TRUE" = 1, "FALSE" = 4),
labels = c("TRUE" = "yes", "FALSE" = "no")) +
theme(legend.position = "top", text = element_text(size = 13.5)) +
geom_abline(intercept = 0, slope = 1, lty = 2) +
coord_cartesian(xlim = c(0, 0.25), ylim = c(0, 0.25))

p2 <- ggplot(pdat, aes(x = gcm, y = wald, shape = caage)) +
geom_point(size = 2, alpha = 0.4) +
theme_bw() +
annotate("rect", xmin = 0, xmax = 0.05, ymin = 0, ymax = 1, fill = "darkblue", alpha = 0.1) +
annotate("rect", ymin = 0, ymax = 0.05, xmin = 0, xmax = 1, fill = "darkred", alpha = 0.1) +
labs(x = "TRAM-GCM p-value", y = "TRAM-Wald p-value", shape = "Set contains both ca and age") +
scale_shape_manual(values = c("TRUE" = 1, "FALSE" = 4),
labels = c("TRUE" = "yes", "FALSE" = "no")) +
theme(legend.position = "top", text = element_text(size = 12.5)) +
geom_abline(intercept = 0, slope = 1, lty = 2) +
scale_x_log10() + scale_y_log10()

library("ggpubr")
ggarrange(p1, p2, common.legend = TRUE)
ggsave("inst/figures/casestudy.pdf", height = 4, width = 7.5)

# Appendix: Informative censoring -----------------------------------------

### Informative right-censoring in random fractions
if (!file.exists(file.path(bpath, "censoring.csv"))) {
probs <- seq(1, 9, 1)/10
eventobs <- which(dat$surv[, 2] == 1)
nsim <- 10
set.seed(241068)
out <- lapply(seq_along(probs), \(idx) {
lapply(1:nsim, \(iter) {
dat$nd <- dat$surv[, 2]
to_censor <- sample(eventobs, floor(probs[idx] * length(eventobs)))
dat$nd[to_censor] <- 0
dat$nsurv <- Surv(dat$surv[, 1], dat$nd)
cfm <- nsurv ~ scoma + dzgroup + ca + age + diabetes + dementia + sex + race
cat(paste0("\nTotal fraction of censored observations is ", round(mean(dat$nd == 0) * 100, 2), "%\n"))
cicp <- coxphICP(formula = cfm, data = dat, env = fe, type = "wald")
tibble(frac = probs[idx], output = paste0(cicp$cand, collapse = "+"),
bind_rows(pvalues(cicp, "set")))
}) |> bind_rows()
}) |> bind_rows()
write_csv(out, file.path(bpath, "censoring.csv"))
} else {
out <- read_csv(file.path(bpath, "censoring.csv"))
}

### Generate tables
o2 <- out |> group_by(frac) |> count(output) |>
pivot_wider(names_from = output, values_from = n, values_fill = 0)
o2

o2 |> select(frac, Empty, ca, `ca+age`) |>
knitr::kable(booktabs = TRUE, format = "latex", digits = 3L)
2 changes: 2 additions & 0 deletions inst/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The SUPPORT2 dataset `support2.sav` can be downloaded
[at this link](https://hbiostat.org/data/repo/support2.sav).

0 comments on commit af26e5b

Please sign in to comment.