diff --git a/_extensions/rostools/r3-theme/_extension.yml b/_extensions/rostools/r3-theme/_extension.yml index f81318c..cd893d1 100644 --- a/_extensions/rostools/r3-theme/_extension.yml +++ b/_extensions/rostools/r3-theme/_extension.yml @@ -40,7 +40,6 @@ contributes: csl: vancouver.csl - callout-appearance: minimal reference-location: margin crossref: chapters: true diff --git a/preamble/pre-course.qmd b/preamble/pre-course.qmd index 0af1ab7..5856d86 100644 --- a/preamble/pre-course.qmd +++ b/preamble/pre-course.qmd @@ -102,7 +102,6 @@ use: #| results: asis install.packages("name-of-package") - ``` If you have issues with the CMA package, you could also try this: @@ -115,8 +114,6 @@ If you have issues with the CMA package, you could also try this: install.packages("devtools") devtools::install_github("BS1125/CMAverse") - - ``` ## Code of conduct {#sec-code-of-conduct} diff --git a/sessions/NetCoupler-example.qmd b/sessions/NetCoupler-example.qmd index 0b0d951..033377d 100644 --- a/sessions/NetCoupler-example.qmd +++ b/sessions/NetCoupler-example.qmd @@ -1,6 +1,6 @@ # NetCoupler -```{r} +```{r setup} #| include: false library(here) library(DiagrammeR) diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd index 6315e3d..72fbcee 100644 --- a/sessions/causal-mediation-analysis-estimation.qmd +++ b/sessions/causal-mediation-analysis-estimation.qmd @@ -1,8 +1,12 @@ # Estimation of effects using causal mediation analysis +<<<<<<< style/apply-some-style-fixes +```{r setup} +======= ```{r} +>>>>>>> main #| include: false library(here) library(DiagrammeR) @@ -91,6 +95,22 @@ explain the intuition behind the estimands. Similar equations exist for other types and combinations of variables. ```{r} +<<<<<<< style/apply-some-style-fixes +nhanes <- read_csv(here::here("data/nhanes_dataset.csv")) +# only keeping the variables we need for the example and removing missing variables +nhanes <- nhanes %>% + select( + id = seqn, + w1 = age, + w2 = gender, + w3 = education_clean, + w4 = smoke, + a = total_redmeat, # this is the exposure + m = magic_biomarker, # this is the mediator + y = blood_glucose # this is the outcome + ) %>% + na.omit() +======= # first load the dataset nhanes <- read.csv(here::here("data/nhanes_dataset.csv")) @@ -105,6 +125,7 @@ nhanes <- nhanes %>% m = magic_biomarker, #this is the mediator y = blood_glucose) %>% #this is the outcome na.omit() +>>>>>>> main ``` We can now run a model for the mediator only adjusting for a single @@ -122,8 +143,12 @@ inflammatory biomarker. Now, we run the model for the outcome, including an interaction. ```{r} +<<<<<<< style/apply-some-style-fixes +lm_y <- lm(y ~ a + m + a:m + w1 + w2 + w3 + w4, data = nhanes) +======= lm_y <- lm(y ~ a + m + a:m + w1, data = nhanes) +>>>>>>> main summary(lm_y) ``` @@ -426,26 +451,25 @@ and Y. ```{r echo=FALSE } # Creating The causal diagram for a mediation model - library(DiagrammeR) grViz(" digraph { graph [] node [shape = plaintext] - X - M + X + M L U [fontcolor = red] - Y + Y edge [minlen = 1.5] - X->Y + X->Y X->L L->Y L->M U->L U->Y - X->M - M->Y + X->M + M->Y W->X W->M W->Y @@ -508,35 +532,35 @@ digraph { L1 L2 Lt - Y + Y edge [minlen = 1.5] X0 -> X1 X1 -> X2 X2 -> Xt - Xt -> Y + Xt -> Y M0 -> M1 M1 -> M2 M2 -> Mt Mt -> Y L0 -> L1 - L1 -> L2 - L2 -> Lt + L1 -> L2 + L2 -> Lt X0 -> M0 X1 -> M1 X2 -> M2 Xt -> Mt L0 -> X0 - L1 -> X1 - L2 -> X2 - Lt -> Xt + L1 -> X1 + L2 -> X2 + Lt -> Xt Lt -> Y X0 -> L1 X1 -> L2 X2 -> Lt - W -> L0 - W -> L1 - W -> L2 - W -> Lt + W -> L0 + W -> L1 + W -> L2 + W -> Lt W -> Y { rank = min; W;} { rank = same; L0; L1; L2; Lt } @@ -581,30 +605,33 @@ We will now demonstrate how to conduct g-computation in a simulated dataset. ```{r} -datasim <- function(n) { - x1 <- rbinom(n, size = 1, prob = 0.5) +datasim <- function(n) { + x1 <- rbinom(n, size = 1, prob = 0.5) x2 <- rbinom(n, size = 1, prob = 0.65) x3 <- rnorm(n, 0, 1) x4 <- rnorm(n, 0, 1) - A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5*x2 + 2.2*x3 + 0.6*x4 + 0.4*x2*x4)) - Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) - Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) - Y <- Y.1*A + Y.0*(1 - A) - data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0) -} + A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5 * x2 + 2.2 * x3 + 0.6 * x4 + 0.4 * x2 * x4)) + Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4)) + Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4)) + Y <- Y.1 * A + Y.0 * (1 - A) + data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0) +} set.seed(120110) # for reproducibility ObsData <- datasim(n = 10000) # really large sample -TRUE_EY.1 <- mean(ObsData$Y.1); TRUE_EY.1 # mean outcome under A = 1 -TRUE_EY.0 <- mean(ObsData$Y.0); TRUE_EY.0 -TRUE_MOR <- (TRUE_EY.1*(1 - TRUE_EY.0))/((1 - TRUE_EY.1)*TRUE_EY.0); TRUE_MOR # true marginal OR +TRUE_EY.1 <- mean(ObsData$Y.1) +TRUE_EY.1 # mean outcome under A = 1 +TRUE_EY.0 <- mean(ObsData$Y.0) +TRUE_EY.0 +TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0) +TRUE_MOR # true marginal OR ``` The Total effect if we use traditional model: ```{r} # Fit a logistic regression model predicting Y from the relevant confounders -Q <- glm(Y ~ A + x2 + x4 + x2*x4, data = ObsData, family=binomial) +Q <- glm(Y ~ A + x2 + x4 + x2 * x4, data = ObsData, family = binomial) exp(Q$coef[2]) ``` @@ -621,20 +648,20 @@ A1Data$A <- 1 A0Data$A <- 0 # Predict Y if A=1 -Y_A1 <- predict(Q, A1Data, type="response") +Y_A1 <- predict(Q, A1Data, type = "response") # Predict Y if A=0 -Y_A0 <- predict(Q, A0Data, type="response") +Y_A0 <- predict(Q, A0Data, type = "response") # Taking a look at the predictions -data.frame(Y_A1=head(Y_A1), Y_A0=head(Y_A0), TRUE_Y=head(ObsData$Y)) |> round(digits = 2) +data.frame(Y_A1 = head(Y_A1), Y_A0 = head(Y_A0), TRUE_Y = head(ObsData$Y)) |> round(digits = 2) # Mean outcomes in the two worlds pred_A1 <- mean(Y_A1) pred_A0 <- mean(Y_A0) # Marginal odds ratio -MOR_gcomp <- (pred_A1*(1 - pred_A0))/((1 - pred_A1)*pred_A0) +MOR_gcomp <- (pred_A1 * (1 - pred_A0)) / ((1 - pred_A1) * pred_A0) # ATE (risk difference) RD_gcomp <- pred_A1 - pred_A0 -c(MOR_gcomp, RD_gcomp) |> round(digits=2) +c(MOR_gcomp, RD_gcomp) |> round(digits = 2) ``` The demo shows how to estimate total effect using g-computation. If you @@ -646,16 +673,26 @@ We recommend take the advantage of R packages to do the work. ```{r} set.seed(1) -expit <- function(x) exp(x)/(1+exp(x)) +expit <- function(x) exp(x) / (1 + exp(x)) n <- 10000 w <- rnorm(n, mean = 1, sd = 0.2) -a <- rbinom(n, 1, expit(0.3 + 0.5*w)) -l <- rnorm(n, mean = 1 + a + 0.5*w, sd = 0.5) -m <- rbinom(n, 1, expit(1 + 2*a - l + 2*w)) -y <- rbinom(n, 1, expit(0.3*a + 0.8*m + 0.5*a*m - 0.3*l + 0.4*w)) +a <- rbinom(n, 1, expit(0.3 + 0.5 * w)) +l <- rnorm(n, mean = 1 + a + 0.5 * w, sd = 0.5) +m <- rbinom(n, 1, expit(1 + 2 * a - l + 2 * w)) +y <- rbinom(n, 1, expit(0.3 * a + 0.8 * m + 0.5 * a * m - 0.3 * l + 0.4 * w)) data <- data.frame(a, m, y, w, l) library(CMAverse) +<<<<<<< style/apply-some-style-fixes +res_gformula <- cmest( + data = data, model = "gformula", outcome = "y", exposure = "a", + mediator = "m", basec = c("w"), postc = "l", EMint = TRUE, + mreg = list("logistic"), yreg = "logistic", + postcreg = list("linear"), + astar = 0, a = 1, mval = list(1), + estimation = "imputation", inference = "bootstrap", nboot = 2 +) +======= res_gformula <- cmest(data = data, model = "gformula", outcome = "y", exposure = "a", mediator = "m", basec = c("w"), postc = "l", EMint = TRUE, @@ -663,6 +700,7 @@ res_gformula <- cmest(data = data, model = "gformula", outcome = "y", exposure = postcreg = list("linear"), astar = 0, a = 1, mval = list(1), estimation = "imputation", inference = "bootstrap", nboot = 2) +>>>>>>> main summary(res_gformula) ``` diff --git a/sessions/causal-mediation-analysis-introduction.qmd b/sessions/causal-mediation-analysis-introduction.qmd index 4eccec8..ef7f202 100644 --- a/sessions/causal-mediation-analysis-introduction.qmd +++ b/sessions/causal-mediation-analysis-introduction.qmd @@ -1,10 +1,13 @@ # Introduction to causal mediation analysis +<<<<<<< style/apply-some-style-fixes +```{r setup} +======= ```{r} +>>>>>>> main #| include: false - library(here) library(DiagrammeR) ``` diff --git a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd index 8b771c7..0b9bebd 100644 --- a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd +++ b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd @@ -209,7 +209,6 @@ available on *U* and that *U* confounds the mediator--outcome relationship. ```{r, echo = FALSE} - library(DiagrammeR) grViz(" digraph { @@ -336,7 +335,6 @@ next figure, we can use these same techniques and the same parameters to do sensitivity analysis for *NDE* as well. ```{r, echo = FALSE} - library(DiagrammeR) grViz(" digraph { diff --git a/sessions/causal-mediation-analysis-survival-outcomes.qmd b/sessions/causal-mediation-analysis-survival-outcomes.qmd index 1dce62a..e2e8851 100644 --- a/sessions/causal-mediation-analysis-survival-outcomes.qmd +++ b/sessions/causal-mediation-analysis-survival-outcomes.qmd @@ -136,7 +136,7 @@ impact of obesity on CVD-related death (measured in years). ```{r} library(tidyverse) -framingham <- read.csv(here::here("data/framingham_dataset.csv")) +framingham <- read_csv(here::here("data/framingham_dataset.csv")) framingham <- framingham %>% select( @@ -156,7 +156,7 @@ framingham <- framingham %>% TRUE ~ 0 ), y_time = y_time / 365.25 - ) + ) ``` The new thing here is that we need to calculate the time to event. This diff --git a/sessions/group_work_day1.qmd b/sessions/group_work_day1.qmd index a767741..8d4fc06 100644 --- a/sessions/group_work_day1.qmd +++ b/sessions/group_work_day1.qmd @@ -12,16 +12,15 @@ First, load the dataset and take a few minutes to explore the variables in the dataset. -```{r} +```{r setup} library(here) load(here::here("data/frmghamdata.RData")) - ``` ## Variables -The dataset includes 'r nrow(frmgham)´ participants and 'r -ncol(frmgham)´. +The dataset includes `r nrow(frmgham)` participants and +`r ncol(frmgham)`. ```{r} str(frmgham) diff --git a/sessions/motivation-for-mediation-analysis.qmd b/sessions/motivation-for-mediation-analysis.qmd index 4f9bf81..426b6ea 100644 --- a/sessions/motivation-for-mediation-analysis.qmd +++ b/sessions/motivation-for-mediation-analysis.qmd @@ -2,9 +2,8 @@ -```{r} +```{r setup} #| include: false - # Creating The causal diagram for a mediation model library(DiagrammeR) ``` @@ -99,7 +98,6 @@ effectiveness of these prevention efforts ```{r} #| echo: false - # Creating The causal diagram for a mediation model library(DiagrammeR) grViz(" @@ -108,7 +106,7 @@ digraph { node [shape = plaintext] X [label = 'Lifestyle intervention'] M [label = 'Weight loss'] - N [label = '?'] + N [label = '?'] Y [label = 'Type 2 diabetes'] edge [minlen = 2] X->M diff --git a/sessions/reporting-guidelines.qmd b/sessions/reporting-guidelines.qmd index 4c9e09a..585a4a4 100644 --- a/sessions/reporting-guidelines.qmd +++ b/sessions/reporting-guidelines.qmd @@ -4,7 +4,7 @@ In this session we will use some time to understand the reporting guidelines for mediation analysis as these are seldom used in practice. ::: callout-note -## The overall course objective +## The overall session objectives - Present results from mediation analysis according to the current mediation analysis reporting guidelines diff --git a/sessions/traditional-mediation-analysis.qmd b/sessions/traditional-mediation-analysis.qmd index 703969d..32a6e62 100644 --- a/sessions/traditional-mediation-analysis.qmd +++ b/sessions/traditional-mediation-analysis.qmd @@ -1,8 +1,6 @@ # What is Mediation analysis? - - -```{r} +```{r setup} #| include: false library(here) library(DiagrammeR) @@ -169,15 +167,16 @@ session. ```{r} # first load the dataset -nhanes <- read.csv(here::here("data/nhanes_dataset.csv")) +nhanes <- read_csv(here::here("data/nhanes_dataset.csv")) nhanes <- nhanes %>% select( - id = seqn, w1 = age, w2 = gender, w3 = education_clean, w4 = smoke, - a=total_redmeat, #this is the exposure - m=magic_biomarker, #this is the mediator - y=blood_glucose) %>% #this is the outcome - na.omit() + id = seqn, w1 = age, w2 = gender, w3 = education_clean, w4 = smoke, + a = total_redmeat, # this is the exposure + m = magic_biomarker, # this is the mediator + y = blood_glucose + ) %>% # this is the outcome + na.omit() ``` ## Method 1: Baron & Kenny (the product method)