From 5b5d8f5ca31b8d4b68c2219e227bd01705c5f6c0 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 20:09:34 +0200 Subject: [PATCH 1/7] added refs for motivation section --- includes/papers.bib | 77 +++++++++++++++++++ .../motivation-for-mediation-analysis.qmd | 25 +++--- 2 files changed, 87 insertions(+), 15 deletions(-) diff --git a/includes/papers.bib b/includes/papers.bib index 61e5140..b1b4a0b 100644 --- a/includes/papers.bib +++ b/includes/papers.bib @@ -56,6 +56,21 @@ @article{wittenbecher_dihydroceramide-_2022 file = {Full Text PDF:C\:\\Users\\au302898\\Zotero\\storage\\T8FPAUX7\\Wittenbecher et al. - 2022 - Dihydroceramide- and ceramide-profiling provides i.pdf:application/pdf}, } +@article{colombo_order-independent_2014, + title = {Order-independent constraint-based causal structure learning}, + volume = {15}, + issn = {1532-4435}, + abstract = {We consider constraint-based methods for causal structure learning, such as the PC-, FCI-, RFCI- and CCD- algorithms (Spirtes et al., 1993, 2000; Richardson, 1996; Colombo et al., 2012; Claassen et al., 2013). The first step of all these algorithms consists of the adjacency search of the PC-algorithm. The PC-algorithm is known to be order-dependent, in the sense that the output can depend on the order in which the variables are given. This order-dependence is a minor issue in low-dimensional settings. We show, however, that it can be very pronounced in high-dimensional settings, where it can lead to highly variable results. We propose several modifications of the PC-algorithm (and hence also of the other algorithms) that remove part or all of this order-dependence. All proposed modifications are consistent in high-dimensional settings under the same conditions as their original counterparts. We compare the PC-, FCI-, and RFCI-algorithms and their modifications in simulation studies and on a yeast gene expression data set. We show that our modifications yield similar performance in low-dimensional settings and improved performance in high-dimensional settings. All software is implemented in the R-package pcalg.}, + number = {1}, + journal = {The Journal of Machine Learning Research}, + author = {Colombo, Diego and Maathuis, Marloes H.}, + month = jan, + year = {2014}, + keywords = {CCD-algorithm, consistency, directed acyclic graph, FCI-algorithm, high-dimensional data, order-dependence, PC-algorithm}, + pages = {3741--3782}, + file = {Full Text PDF:C\:\\Users\\au302898\\Zotero\\storage\\STAZTLVC\\Colombo and Maathuis - 2014 - Order-independent constraint-based causal structur.pdf:application/pdf}, +} + @misc{johnston_netcoupler_2022, title = {{NetCoupler}}, url = {https://github.com/NetCoupler/NetCoupler}, @@ -356,3 +371,65 @@ @article{shi_cmaverse_2021 pages = {e20}, file = {Snapshot:C\:\\Users\\au302898\\Zotero\\storage\\F2IBK8FH\\cmaverse__a_suite_of_functions_for_reproducible.23.html:text/html}, } + +@article{knowler_reduction_2002, + title = {Reduction in the incidence of type 2 diabetes with lifestyle intervention or metformin}, + volume = {346}, + issn = {1533-4406}, + doi = {10.1056/NEJMoa012512}, + abstract = {BACKGROUND: Type 2 diabetes affects approximately 8 percent of adults in the United States. Some risk factors--elevated plasma glucose concentrations in the fasting state and after an oral glucose load, overweight, and a sedentary lifestyle--are potentially reversible. We hypothesized that modifying these factors with a lifestyle-intervention program or the administration of metformin would prevent or delay the development of diabetes. +METHODS: We randomly assigned 3234 nondiabetic persons with elevated fasting and post-load plasma glucose concentrations to placebo, metformin (850 mg twice daily), or a lifestyle-modification program with the goals of at least a 7 percent weight loss and at least 150 minutes of physical activity per week. The mean age of the participants was 51 years, and the mean body-mass index (the weight in kilograms divided by the square of the height in meters) was 34.0; 68 percent were women, and 45 percent were members of minority groups. +RESULTS: The average follow-up was 2.8 years. The incidence of diabetes was 11.0, 7.8, and 4.8 cases per 100 person-years in the placebo, metformin, and lifestyle groups, respectively. The lifestyle intervention reduced the incidence by 58 percent (95 percent confidence interval, 48 to 66 percent) and metformin by 31 percent (95 percent confidence interval, 17 to 43 percent), as compared with placebo; the lifestyle intervention was significantly more effective than metformin. To prevent one case of diabetes during a period of three years, 6.9 persons would have to participate in the lifestyle-intervention program, and 13.9 would have to receive metformin. +CONCLUSIONS: Lifestyle changes and treatment with metformin both reduced the incidence of diabetes in persons at high risk. The lifestyle intervention was more effective than metformin.}, + language = {eng}, + number = {6}, + journal = {The New England Journal of Medicine}, + author = {Knowler, William C. and Barrett-Connor, Elizabeth and Fowler, Sarah E. and Hamman, Richard F. and Lachin, John M. and Walker, Elizabeth A. and Nathan, David M. and {Diabetes Prevention Program Research Group}}, + month = feb, + year = {2002}, + pmid = {11832527}, + pmcid = {PMC1370926}, + keywords = {Adult, Blood Glucose, Body Mass Index, Diabetes Mellitus, Type 2, Double-Blind Method, Energy Intake, Exercise, Female, Humans, Hypoglycemic Agents, Incidence, Life Style, Male, Metformin, Middle Aged, Patient Compliance, Risk Factors, Weight Loss}, + pages = {393--403}, + file = {Full Text:C\:\\Users\\au302898\\Zotero\\storage\\PQA8KYS7\\Knowler et al. - 2002 - Reduction in the incidence of type 2 diabetes with.pdf:application/pdf}, +} + +@article{pan_effects_1997, + title = {Effects of diet and exercise in preventing {NIDDM} in people with impaired glucose tolerance. {The} {Da} {Qing} {IGT} and {Diabetes} {Study}}, + volume = {20}, + issn = {0149-5992}, + doi = {10.2337/diacare.20.4.537}, + abstract = {OBJECTIVE: Individuals with impaired glucose tolerance (IGT) have a high risk of developing NIDDM. The purpose of this study was to determine whether diet and exercise interventions in those with IGT may delay the development of NIDDM, i.e., reduce the incidence of NIDDM, and thereby reduce the overall incidence of diabetic complications, such as cardiovascular, renal, and retinal disease, and the excess mortality attributable to these complications. +RESEARCH DESIGN AND METHODS: In 1986, 110,660 men and women from 33 health care clinics in the city of Da Qing, China, were screened for IGT and NIDDM. Of these individuals, 577 were classified (using World Health Organization criteria) as having IGT. Subjects were randomized by clinic into a clinical trial, either to a control group or to one of three active treatment groups: diet only, exercise only, or diet plus exercise. Follow-up evaluation examinations were conducted at 2-year intervals over a 6-year period to identify subjects who developed NIDDM. Cox's proportional hazard analysis was used to determine if the incidence of NIDDM varied by treatment assignment. +RESULTS: The cumulative incidence of diabetes at 6 years was 67.7\% (95\% CI, 59.8-75.2) in the control group compared with 43.8\% (95\% CI, 35.5-52.3) in the diet group, 41.1\% (95\% CI, 33.4-49.4) in the exercise group, and 46.0\% (95\% CI, 37.3-54.7) in the diet-plus-exercise group (P {\textless} 0.05). When analyzed by clinic, each of the active intervention groups differed significantly from the control clinics (P {\textless} 0.05). The relative decrease in rate of development of diabetes in the active treatment groups was similar when subjects were stratified as lean or overweight (BMI {\textless} or {\textgreater} or = 25 kg/m2). In a proportional hazards analysis adjusted for differences in baseline BMI and fasting glucose, the diet, exercise, and diet-plus-exercise interventions were associated with 31\% (P {\textless} 0.03), 46\% (P {\textless} 0.0005), and 42\% (P {\textless} 0.005) reductions in risk of developing diabetes, respectively. +CONCLUSIONS: Diet and/or exercise interventions led to a significant decrease in the incidence of diabetes over a 6-year period among those with IGT.}, + language = {eng}, + number = {4}, + journal = {Diabetes Care}, + author = {Pan, X. R. and Li, G. W. and Hu, Y. H. and Wang, J. X. and Yang, W. Y. and An, Z. X. and Hu, Z. X. and Lin, J. and Xiao, J. Z. and Cao, H. B. and Liu, P. A. and Jiang, X. G. and Jiang, Y. Y. and Wang, J. P. and Zheng, H. and Zhang, H. and Bennett, P. H. and Howard, B. V.}, + month = apr, + year = {1997}, + pmid = {9096977}, + keywords = {Adult, Blood Glucose, Body Mass Index, China, Combined Modality Therapy, Diabetes Mellitus, Diabetes Mellitus, Type 2, Exercise, Female, Follow-Up Studies, Glucose Intolerance, Humans, Incidence, Male, Mass Screening, Middle Aged, Obesity, Proportional Hazards Models, Risk Factors, Time Factors}, + pages = {537--544}, +} + +@article{tuomilehto_prevention_2001, + title = {Prevention of type 2 diabetes mellitus by changes in lifestyle among subjects with impaired glucose tolerance}, + volume = {344}, + issn = {0028-4793}, + doi = {10.1056/NEJM200105033441801}, + abstract = {BACKGROUND: Type 2 diabetes mellitus is increasingly common, primarily because of increases in the prevalence of a sedentary lifestyle and obesity. Whether type 2 diabetes can be prevented by interventions that affect the lifestyles of subjects at high risk for the disease is not known. +METHODS: We randomly assigned 522 middle-aged, overweight subjects (172 men and 350 women; mean age, 55 years; mean body-mass index [weight in kilograms divided by the square of the height in meters], 31) with impaired glucose tolerance to either the intervention group or the control group. Each subject in the intervention group received individualized counseling aimed at reducing weight, total intake of fat, and intake of saturated fat and increasing intake of fiber and physical activity. An oral glucose-tolerance test was performed annually; the diagnosis of diabetes was confirmed by a second test. The mean duration of follow-up was 3.2 years. +RESULTS: The mean (+/-SD) amount of weight lost between base line and the end of year 1 was 4.2+/-5.1 kg in the intervention group and 0.8+/-3.7 kg in the control group; the net loss by the end of year 2 was 3.5+/-5.5 kg in the intervention group and 0.8+/-4.4 kg in the control group (P{\textless}0.001 for both comparisons between the groups). The cumulative incidence of diabetes after four years was 11 percent (95 percent confidence interval, 6 to 15 percent) in the intervention group and 23 percent (95 percent confidence interval, 17 to 29 percent) in the control group. During the trial, the risk of diabetes was reduced by 58 percent (P{\textless}0.001) in the intervention group. The reduction in the incidence of diabetes was directly associated with changes in lifestyle. +CONCLUSIONS: Type 2 diabetes can be prevented by changes in the lifestyles of high-risk subjects.}, + language = {eng}, + number = {18}, + journal = {The New England Journal of Medicine}, + author = {Tuomilehto, J. and Lindström, J. and Eriksson, J. G. and Valle, T. T. and Hämäläinen, H. and Ilanne-Parikka, P. and Keinänen-Kiukaanniemi, S. and Laakso, M. and Louheranta, A. and Rastas, M. and Salminen, V. and Uusitupa, M. and {Finnish Diabetes Prevention Study Group}}, + month = may, + year = {2001}, + pmid = {11333990}, + keywords = {Diabetes Mellitus, Type 2, Diet, Fat-Restricted, Dietary Fiber, Exercise, Female, Glucose Intolerance, Glucose Tolerance Test, Humans, Incidence, Life Style, Male, Middle Aged, Obesity, Risk, Weight Loss}, + pages = {1343--1350}, +} diff --git a/sessions/motivation-for-mediation-analysis.qmd b/sessions/motivation-for-mediation-analysis.qmd index 426b6ea..2c1b389 100644 --- a/sessions/motivation-for-mediation-analysis.qmd +++ b/sessions/motivation-for-mediation-analysis.qmd @@ -4,7 +4,7 @@ ```{r setup} #| include: false -# Creating The causal diagram for a mediation model + library(DiagrammeR) ``` @@ -23,7 +23,7 @@ uncovering the underlying pathways, and mechanisms. ```{r} #| echo: false -library(DiagrammeR) + grViz(" digraph { graph [] @@ -46,8 +46,6 @@ digraph { ### Confirmation and refutation of theory - - Let's assume we estimate a familial risk of diabetes and that risk estimates follow a normal distribution at the population level. Then we select the extremes of the distribution (2.5%) and assess the relative @@ -57,19 +55,17 @@ individuals with a strong/weak familial risk of type 2 diabetes. ### To refine interventions Let's consider the evidence from the landmark prevention trials. The -Diabetes Prevention Programme (DPP, USA), The Diabetes Prevention Study -(DPS, Finland), and The Da Quin study (DPS, China). These studies -randomized high risk individuals to either metformin or a lifestyle -intervention consisting of a physical activity or physical activity + -diet. The main finding was a \~58% lower risk of incidence type 2 -diabetes in the lifestyle intervention group compared to the metformin -group +Diabetes Prevention Programme @knowler_reduction_2002, The Diabetes +Prevention Study @tuomilehto_prevention_2001, and The Da Quin study +@pan_effects_1997. These studies randomized high risk individuals to +either metformin or a lifestyle intervention consisting of a physical +activity or physical activity + diet. The main finding was a \~58% lower +risk of incidence type 2 diabetes in the lifestyle intervention group +compared to the metformin group ```{r} #| echo: false -# Creating The causal diagram for a mediation model -library(DiagrammeR) grViz(" digraph { graph [] @@ -98,8 +94,7 @@ effectiveness of these prevention efforts ```{r} #| echo: false -# Creating The causal diagram for a mediation model -library(DiagrammeR) + grViz(" digraph { graph [] From 17b947e11942e070c6cfbb7ef2acee0afee0fdc6 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 20:21:55 +0200 Subject: [PATCH 2/7] updated refs for traditional mediation session --- includes/papers.bib | 101 ++++++++++++++++++++ sessions/traditional-mediation-analysis.qmd | 59 ++++++------ 2 files changed, 131 insertions(+), 29 deletions(-) diff --git a/includes/papers.bib b/includes/papers.bib index b1b4a0b..c6751e3 100644 --- a/includes/papers.bib +++ b/includes/papers.bib @@ -433,3 +433,104 @@ @article{tuomilehto_prevention_2001 keywords = {Diabetes Mellitus, Type 2, Diet, Fat-Restricted, Dietary Fiber, Exercise, Female, Glucose Intolerance, Glucose Tolerance Test, Humans, Incidence, Life Style, Male, Middle Aged, Obesity, Risk, Weight Loss}, pages = {1343--1350}, } + +@article{robins_new_1986, + title = {A new approach to causal inference in mortality studies with a sustained exposure period—application to control of the healthy worker survivor effect}, + volume = {7}, + issn = {0270-0255}, + url = {https://www.sciencedirect.com/science/article/pii/0270025586900886}, + doi = {10.1016/0270-0255(86)90088-6}, + abstract = {In observational cohort mortality studies with prolonged periods of exposure to the agent under study, it is not uncommon for risk factors for death to be determinants of subsequent exposure. For instance, in occupational mortality studies date of termination of employment is both a determinant of future exposure (since terminated individuals receive no further exposure) and an independent risk factor for death (since disabled individuals tend to leave employment). When current risk factor status determines subsequent exposure and is determined by previous exposure, standard analyses that estimate age-specific mortality rates as a function of cumulative exposure may underestimate the true effect of exposure on mortality whether or not one adjusts for the risk factor in the analysis. This observation raises the question, which if any population parameters can be given a causal interpretation in observational mortality studies? In answer, we offer a graphical approach to the identification and computation of causal parameters in mortality studies with sustained exposure periods. This approach is shown to be equivalent to an approach in which the observational study is identified with a hypothetical double-blind randomized trial in which data on each subject's assigned treatment protocol has been erased from the data file. Causal inferences can then be made by comparing mortality as a function of treatment protocol, since, in a double-blind randomized trial missing data on treatment protocol, the association of mortality with treatment protocol can still be estimated. We reanalyze the mortality experience of a cohort of arsenic-exposed copper smelter workers with our method and compare our results with those obtained using standard methods. We find an adverse effect of arsenic exposure on all-cause and lung cancer mortality which standard methods fail to detect.}, + number = {9}, + urldate = {2024-05-27}, + journal = {Mathematical Modelling}, + author = {Robins, James}, + month = jan, + year = {1986}, + pages = {1393--1512}, + file = {ScienceDirect Snapshot:C\:\\Users\\au302898\\Zotero\\storage\\DAUS4LA9\\0270025586900886.html:text/html}, +} + +@article{baron_moderatormediator_1986, + title = {The moderator–mediator variable distinction in social psychological research: {Conceptual}, strategic, and statistical considerations}, + volume = {51}, + issn = {1939-1315}, + shorttitle = {The moderator–mediator variable distinction in social psychological research}, + doi = {10.1037/0022-3514.51.6.1173}, + abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators. (46 ref) (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + number = {6}, + journal = {Journal of Personality and Social Psychology}, + author = {Baron, Reuben M. and Kenny, David A.}, + year = {1986}, + note = {Place: US +Publisher: American Psychological Association}, + keywords = {Experimentation, Independent Variables, Social Psychology, Statistical Analysis}, + pages = {1173--1182}, + file = {Full Text:C\:\\Users\\au302898\\Zotero\\storage\\JKBB6Q8Q\\Baron and Kenny - 1986 - The moderator–mediator variable distinction in soc.pdf:application/pdf;Snapshot:C\:\\Users\\au302898\\Zotero\\storage\\XV842DRZ\\doiLanding.html:text/html}, +} + +@inproceedings{tein_estimating_2003, + address = {Tokyo}, + title = {Estimating {Mediated} {Effects} with {Survival} {Data}}, + isbn = {978-4-431-66996-8}, + doi = {10.1007/978-4-431-66996-8_46}, + abstract = {Mediation analyses help identify variables in the causal sequence relating predictor variables to outcome variables. In many studies, outcomes are time until an event occurs and survival analyses are applied. This study examines the point and interval estimates of the mediated effect using two methods of survival analyses: the log-survival time and log-hazard time models. The results show that, under the condition of no censored data, the assumption that mediated effects calculated by the product of coefficients method (αß) and those calculated by the difference in coefficients method (τ − τ’) are identical does apply to log-survival time survival analyses but not to log-hazard time survival analyses. The standard error of the mediated effect can be calculated with the delta formula, the second order Taylor series formula, and the unbiased formula. Consistent with ordinary least squares regression, the three formulas yield similar results. Although the log-survival time model and the log-hazard time model utilize different estimation methods, the results of the significant tests, using the ratio of αß to seαß, were comparable between the two methods. However, the significance tests based on the empirical standard error appear to be more conservative than those from the three analytical standard errors.}, + language = {en}, + booktitle = {New {Developments} in {Psychometrics}}, + publisher = {Springer Japan}, + author = {Tein, Jenn-Yun and MacKinnon, David P.}, + editor = {Yanai, H. and Okada, A. and Shigemasu, K. and Kano, Y. and Meulman, J. J.}, + year = {2003}, + pages = {405--412}, +} + +@article{sobel_asymptotic_1982, + title = {Asymptotic {Confidence} {Intervals} for {Indirect} {Effects} in {Structural} {Equation} {Models}}, + volume = {13}, + issn = {0081-1750}, + url = {https://www.jstor.org/stable/270723}, + doi = {10.2307/270723}, + urldate = {2024-05-27}, + journal = {Sociological Methodology}, + author = {Sobel, Michael E.}, + year = {1982}, + note = {Publisher: [American Sociological Association, Wiley, Sage Publications, Inc.]}, + pages = {290--312}, + file = {JSTOR Full Text PDF:C\:\\Users\\au302898\\Zotero\\storage\\3D66SJ4H\\Sobel - 1982 - Asymptotic Confidence Intervals for Indirect Effec.pdf:application/pdf}, +} + +@article{preacher_spss_2004, + title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, + volume = {36}, + issn = {1532-5970}, + url = {https://doi.org/10.3758/BF03206553}, + doi = {10.3758/BF03206553}, + abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society’s Web archive atwww.psychonomic.org/archive/.}, + language = {en}, + number = {4}, + urldate = {2024-05-27}, + journal = {Behavior Research Methods, Instruments, \& Computers}, + author = {Preacher, Kristopher J. and Hayes, Andrew F.}, + month = nov, + year = {2004}, + keywords = {Cognitive Therapy, Indirect Effect, Life Satisfaction, Mediation Analysis, Sobel Test}, + pages = {717--731}, + file = {Full Text PDF:C\:\\Users\\au302898\\Zotero\\storage\\668GJEXG\\Preacher and Hayes - 2004 - SPSS and SAS procedures for estimating indirect ef.pdf:application/pdf}, +} + +@article{tingley_mediation_2014, + title = {\textbf{mediation} : \textit{{R}} {Package} for {Causal} {Mediation} {Analysis}}, + volume = {59}, + issn = {1548-7660}, + shorttitle = {\textbf{mediation}}, + url = {http://www.jstatsoft.org/v59/i05/}, + doi = {10.18637/jss.v059.i05}, + abstract = {In this paper, we describe the R package mediation for conducting causal mediation analysis in applied empirical research. In many scientific disciplines, the goal of researchers is not only estimating causal effects of a treatment but also understanding the process in which the treatment causally affects the outcome. Causal mediation analysis is frequently used to assess potential causal mechanisms. The mediation package implements a comprehensive suite of statistical tools for conducting such an analysis. The package is organized into two distinct approaches. Using the model-based approach, researchers can estimate causal mediation effects and conduct sensitivity analysis under the standard research design. Furthermore, the design-based approach provides several analysis tools that are applicable under different experimental designs. This approach requires weaker assumptions than the model-based approach. We also implement a statistical method for dealing with multiple (causally dependent) mediators, which are often encountered in practice. Finally, the package also offers a methodology for assessing causal mediation in the presence of treatment noncompliance, a common problem in randomized trials.}, + language = {en}, + number = {5}, + urldate = {2024-05-27}, + journal = {Journal of Statistical Software}, + author = {Tingley, Dustin and Yamamoto, Teppei and Hirose, Kentaro and Keele, Luke and Imai, Kosuke}, + year = {2014}, + file = {Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:C\:\\Users\\au302898\\Zotero\\storage\\AQ6Z2QNW\\Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:application/pdf}, +} diff --git a/sessions/traditional-mediation-analysis.qmd b/sessions/traditional-mediation-analysis.qmd index 32a6e62..a1d72d4 100644 --- a/sessions/traditional-mediation-analysis.qmd +++ b/sessions/traditional-mediation-analysis.qmd @@ -11,9 +11,9 @@ library(dplyr) ::: callout-note ## Objective of this session -To learn how to conduct mediation analysis using the traditional -approaches, along with understanding the assumptions and limitations of -the methods. +- To learn how to conduct mediation analysis using the traditional + approaches. +- Understanding the assumptions and limitations of the methods. ::: The purpose of mediation analysis is to determine if the effect of a @@ -25,9 +25,9 @@ This can be visualized in the following figures. Overall relationship between A and Y: -```{r echo=FALSE} -# Creating The causal diagram for a mediation model -library(DiagrammeR) +```{r} +#| echo: false + grViz(" digraph { graph [] @@ -43,8 +43,8 @@ digraph { Relationship between X and Y through M: ```{r echo=FALSE} -# Creating The causal diagram for a mediation model -library(DiagrammeR) +#| echo: false + grViz(" digraph { graph [] @@ -64,8 +64,8 @@ digraph { ## Traditional approaches for mediation analysis The two traditional approaches to mediation analysis are **the -difference method** and **the product method** (also known as the Baron -& Kenny-method). +difference method** and **the product method**; also known as the Baron +& Kenny-method @baron_moderatormediator_1986. The traditional approach use these equations for mediator and outcome (for the case of a continuous mediator and a continuous outcome) @@ -114,8 +114,8 @@ depicted by below figure. > glucose by mediating specific inflammation biomarkers? ```{r echo=FALSE} -# Creating The causal diagram for a mediation model -library(DiagrammeR) +#| echo: false + grViz(" digraph { graph [] @@ -166,7 +166,7 @@ session. ::: ```{r} -# first load the dataset + nhanes <- read_csv(here::here("data/nhanes_dataset.csv")) nhanes <- nhanes %>% @@ -174,15 +174,16 @@ nhanes <- nhanes %>% id = seqn, w1 = age, w2 = gender, w3 = education_clean, w4 = smoke, a = total_redmeat, # this is the exposure m = magic_biomarker, # this is the mediator - y = blood_glucose - ) %>% # this is the outcome + y = blood_glucose # this is the outcome + ) %>% na.omit() ``` ## Method 1: Baron & Kenny (the product method) -According to Baron and Kenny (1986), the following criteria need to be -satisfied for a variable to be considered a mediator: +According to Baron and Kenny (1986) @baron_moderatormediator_1986, the +following criteria need to be satisfied for a variable to be considered +a mediator: 1. The exposure should be associated with the mediator. 2. The model for the outcome that includes both the exposure and @@ -209,7 +210,7 @@ As the mediator is a continuous variable, we can build a linear regression model, adjusting for confounders on the pathway between A and M. -\$ (E(M\|A=a, C=c) = \beta\_0 + \beta\_1a + \beta\_2c\$ (3.1) +$(E(M|A=a, C=c) = \beta_0 + \beta_1a + \beta_2c$ (3.1) ```{r} fita <- lm(m ~ a + w1 + w2 + w3 + w4, data = nhanes) @@ -247,7 +248,7 @@ That is to build the model between red meat intake and glucose levels, without adjusting for the inflammatory biomarker. The only difference between step 2 and step 3 is that mediator is not included in the model. -$(E(Y|A=a, C=c)$ = $\theta_0'$ + $\theta_1'a$ + $\theta4'c$ (1.3) +$(E(Y|A=a, C=c) = \theta_0' + \theta_1'a + \theta4'c$ (1.3) ```{r} fitc <- lm(y ~ a + w1 + w2 + w3 + w4, data = nhanes) @@ -340,13 +341,14 @@ and the difference methods.You might have noticed what you got so far are only point estimates, but what if you want to see if this mediation effect is statistically significant (different from zero or not)? -To do so, there are two main approaches: the Sobel test (Sobel, 1982) -and bootstrapping (Preacher & Hayes, 2004). +To do so, there are two main approaches: the Sobel test +@sobel_asymptotic_1982 and bootstrapping @preacher_spss_2004. Here we will demonstrate that we can use the mediate() function in -'mediation' package (Tingley et al. 2019) to conduct mediation analysis. -One of the very good options to conduct mediation analysis using the -mediation package is we can get confidence interval by bootstrapping. +'mediation' package @tingley_mediation_2014 to conduct mediation +analysis. One of the very good options to conduct mediation analysis +using the mediation package is we can get confidence interval by +bootstrapping. Let's load up the R packages. @@ -402,7 +404,7 @@ plot(results) ``` For details of the mediate() package, please refer to -(mediation_rpackage 2014). +@tingley_mediation_2014. ## Limitations of the traditional approach @@ -410,9 +412,9 @@ For details of the mediate() package, please refer to - Interactions - Multiple mediators -```{r echo=FALSE} -# Creating The causal diagram for a mediation model -library(DiagrammeR) +```{r} +#| echo: false + grViz(" digraph { graph [] @@ -446,4 +448,3 @@ estimate a causal mediation effect: - linear relationship - no interactions - rare binary outcome - From eb9c2dc9c64a0dd51afae4ae7c81f7b121024c7a Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 20:36:43 +0200 Subject: [PATCH 3/7] updated DAGs in causal mediation session --- sessions/causal-mediation-analysis-introduction.qmd | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sessions/causal-mediation-analysis-introduction.qmd b/sessions/causal-mediation-analysis-introduction.qmd index 7b1005e..f65354f 100644 --- a/sessions/causal-mediation-analysis-introduction.qmd +++ b/sessions/causal-mediation-analysis-introduction.qmd @@ -270,6 +270,7 @@ digraph { edge [minlen = 2] A->M M->Y + A->Y W->A W->Y W->M @@ -316,6 +317,7 @@ digraph { edge [minlen = 2] A->M M->Y + A->Y W1->A W1->Y W1->M @@ -330,9 +332,10 @@ digraph { ``` From the DAG rules, we have a special problem that we cannot solve with -traditional regression approaches. If we adjust for W2 we open the -backdoor path from $W1 \leftarrow W2 \rightarrow Y$. We will work on how -to solve this problem later in the course. +traditional regression approaches. If we adjust for W2 we open a +backdoor path by adjusting for the collider +$W1 \rightarrow W2 \leftarrow A$. We will work on how to solve this +problem later in the course. ## Non-linearity and interactions @@ -575,3 +578,4 @@ In addition to consistency there is the assumption of no carry-over effects. ## References + From d2edd5a17a9382230fcb38f5fd921e0066b86ce9 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 21:20:36 +0200 Subject: [PATCH 4/7] cleaned up survival and group work for day 1 --- ...l-mediation-analysis-survival-outcomes.qmd | 60 ++++++------------- sessions/group_work_day1.qmd | 1 + 2 files changed, 19 insertions(+), 42 deletions(-) diff --git a/sessions/causal-mediation-analysis-survival-outcomes.qmd b/sessions/causal-mediation-analysis-survival-outcomes.qmd index e2e8851..b83ebbf 100644 --- a/sessions/causal-mediation-analysis-survival-outcomes.qmd +++ b/sessions/causal-mediation-analysis-survival-outcomes.qmd @@ -1,6 +1,6 @@ # Survival outcomes - + ::: callout-note ## Learning outcomes @@ -9,6 +9,8 @@ software ::: +## Time-to-event outcomes + There are many studies conducting mediation analyses with **time-to-event outcomes**. Survival analysis allows investigators to study these important outcomes with appropriate consideration for @@ -27,8 +29,9 @@ effect. They found the effect of SES on survival time was partially mediated by stage diagnosis, explaining 12% for lung cancer. ```{r echo=FALSE} -# Creating The causal diagram for a mediation model + library(DiagrammeR) + grViz(" digraph { graph [] @@ -44,23 +47,14 @@ digraph { }") ``` -### Conduct causal mediation analysis for time-to-event outcomes - -How to conduct causal mediation analysis for time-to-event outcomes? We -introduced the difference and product methods for continuous and binary -outcomes in previous session. It is tempting to run a linear regression -model for the mediator and proportional hazard model for the outcome, -then use product or difference method to estimate the direct effect and -indirect effect. - -#### Product method for time-to-event outcomes - The Cox proportional hazards model is commonly used for dealing with survival data in medical literature. Cox regression estimates the **hazard ratios** and the values are then used to determine the effect of the mediator variable between the exposure and the survival time of outcome. +## Cox model for common outcomes + Could we use the traditional approach for time-to-event outcomes? We have introduced the difference and product methods for continuous and binary outcomes in previous session. It is tempting to run a linear @@ -69,17 +63,17 @@ outcome, then use product or difference method to estimate the direct effect and indirect effect. However, 'non-collaspsibility' is a problem of the hazard ratio as odds -ratio (VanderWeele). Therefore, use of Cox PH regression to +ratio @vanderweele_mediation_2016. Therefore, use of Cox regression to approximately estimate indirect effects via difference or product of coefficients rests on the assumption that the outcome is **rare** -(VanderWeele). +@vanderweele_mediation_2016. Where the outcome is common, measures of the indirect effect or -proportion mediated will be incorrect. Tein and Mackinnon (2003) -considered whether the product method and difference method yield -comparable results with respect to time-to-event outcomes. They found -that the methods coincides for the accelerated failure time model but -not for the proportional hazards model. +proportion mediated will be incorrect. Tein and Mackinnon considered +whether the product method and difference method yield comparable +results with respect to time-to-event outcomes @tein_estimating_2003. +They found that the methods coincides for the accelerated failure time +model but not for the proportional hazards model. To sum up, we can only use the traditional approaches for rare outcomes. Otherwise, we can use the product method to get an indication of whether @@ -87,25 +81,6 @@ there is mediation, but be aware that the estimate is not accurate. ### Causal mediation for time-to-event outcomes -::: callout-note -In earlier session,we have been familiar with the counterfactual -concepts in causal mediation: - -- $Yi(a, m)$ is the outcome achieved for person i if, possibly - contrary to fact, exposure had been set to a and mediator to m. - -- $Mi(a)$ is the mediator achieved for person i if, possibly contrary - to fact, exposure had been set to a. - -One can combine the two counterfactuals, yielding so-called nested -counterfactuals defined as - -- $Y(a, M (a\*)$. - -By introducing the nested counterfactual for a ≠ a\* we can give a -precise mathematical definition of mediation. -::: - For a **survival outcome**, the outcome of interest will be survival time (SV). @@ -118,12 +93,12 @@ time (SV). - $λV (t\|c)$: conditional hazard at time t -### Examples +### Example ::: callout-note We will continue working on the obesity-CVD example in the Framingham dataset. -The outcome of interest is death from cardiovascular diseases (cvd). The +The outcome of interest is death from cardiovascular diseases (CVD). The underlying time scale is in days, starting at participants entered the cohort. The exposure of interest is obesity status at baseline, where a=1 indicates obese, a=0 indicates non-obese. The mediator is blood @@ -136,6 +111,7 @@ impact of obesity on CVD-related death (measured in years). ```{r} library(tidyverse) + framingham <- read_csv(here::here("data/framingham_dataset.csv")) framingham <- framingham %>% @@ -155,7 +131,7 @@ framingham <- framingham %>% bmi >= 25 ~ 1, TRUE ~ 0 ), - y_time = y_time / 365.25 + y_time = y_time / 365.25 #change time-scale to years ) ``` diff --git a/sessions/group_work_day1.qmd b/sessions/group_work_day1.qmd index 8d4fc06..26379a7 100644 --- a/sessions/group_work_day1.qmd +++ b/sessions/group_work_day1.qmd @@ -14,6 +14,7 @@ in the dataset. ```{r setup} library(here) + load(here::here("data/frmghamdata.RData")) ``` From d726244e27a9ed2dbbc9da950a90022b1a4c8f66 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 21:21:08 +0200 Subject: [PATCH 5/7] added refs to survival --- sessions/causal-mediation-analysis-survival-outcomes.qmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sessions/causal-mediation-analysis-survival-outcomes.qmd b/sessions/causal-mediation-analysis-survival-outcomes.qmd index b83ebbf..e606e7c 100644 --- a/sessions/causal-mediation-analysis-survival-outcomes.qmd +++ b/sessions/causal-mediation-analysis-survival-outcomes.qmd @@ -156,3 +156,6 @@ res_rb_coxph <- cmest( summary(res_rb_coxph) ``` + +## References + From 9c3eb84948bc0dd519521206251e3b6436fd9092 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 21:30:53 +0200 Subject: [PATCH 6/7] g-comp ref added --- sessions/causal-mediation-analysis-estimation.qmd | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd index 4dc14d3..2590473 100644 --- a/sessions/causal-mediation-analysis-estimation.qmd +++ b/sessions/causal-mediation-analysis-estimation.qmd @@ -548,10 +548,10 @@ digraph { ### G-computation -The G-computation algorithm was first introduced by Robins 1986 (refer) -for estimating time-varying exposure causal in the presence of -time-varying confounders of exposure effects. When estimating total -effect, g-computation is generally equivalent to +The G-computation algorithm was first introduced by Robins 1986 +@robins_new_1986 for estimating time-varying exposure causal in the +presence of time-varying confounders of exposure effects. When +estimating total effect, g-computation is generally equivalent to inverse-probability-of-treatment weighting (IPTW). But in high-dimensional settings, g-computation is more powerful. G-computation (using g-formula) could also provide an intuitive method for decomposing @@ -670,4 +670,3 @@ res_gformula <- cmest(data = data, model = "gformula", outcome = "y", exposure = summary(res_gformula) ``` - From 5eb5707a7c3872d816a5ed1b26e604c087142cb1 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Mon, 27 May 2024 21:33:35 +0200 Subject: [PATCH 7/7] deleted text bit not needed --- sessions/causal-mediation-analysis-estimation.qmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd index 2590473..8e4ad3c 100644 --- a/sessions/causal-mediation-analysis-estimation.qmd +++ b/sessions/causal-mediation-analysis-estimation.qmd @@ -1,7 +1,5 @@ # Estimation of effects using causal mediation analysis -\<\<\<\<\<\<\< style/apply-some-style-fixes - ```{r setup} #| include: false