-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmle_itt_analysis_censoring.R
204 lines (158 loc) · 9.26 KB
/
tmle_itt_analysis_censoring.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
####################################################################
# Censoring analysis (ITT) for initial cohort of n=64120 patients #
####################################################################
# Load required libraries
library(survival)
library(survRM2)
library(survminer)
library(weights)
library(ggplot2)
# output directory
output_dir <- './outputs/'
simulation_version <- paste0(format(Sys.time(), "%Y%m%d"),"/")
if(!dir.exists(output_dir)){
print(paste0('create folder for outputs at: ', output_dir))
dir.create(output_dir)
}
output_dir = paste0(output_dir, simulation_version)
if(!dir.exists(output_dir)){
print(paste0('create folder for outputs at: ', output_dir))
dir.create(output_dir)
}
use.simulated <- FALSE
# load data
if(use.simulated){
load("simdata_from_basevars.RData")
# make censoring variable name consistent with CMS dataset
names(simdata_from_basevars)[names(simdata_from_basevars) == 'days_to_censored_or_outcome'] <- 'days_to_censored'
fup3yr_episode_months_deid <- simdata_from_basevars
}else{
load("/data/MedicaidAP_associate/poulos/fup3yr_episode_months_deid_fixmonthout.RData") # run on Argos
print(paste0("Original data dimension: ", dim(fup3yr_episode_months_deid)))
}
# covariates
L.unscaled <- cbind(dummify(factor(fup3yr_episode_months_deid $state_character),show.na = FALSE)[,-c(6)],
dummify(factor(fup3yr_episode_months_deid $race_defined_character),show.na = FALSE)[,-c(3)],
dummify(factor(fup3yr_episode_months_deid $smi_condition))[,-c(1)], # omit bipolar
"payer_index_mdcr"=dummify(factor(fup3yr_episode_months_deid $payer_index))[,2],
"year"=fup3yr_episode_months_deid $year,
"female"=fup3yr_episode_months_deid $female,
"preperiod_ever_psych"=fup3yr_episode_months_deid $preperiod_ever_psych,
"preperiod_ever_metabolic"=fup3yr_episode_months_deid $preperiod_ever_metabolic,
"preperiod_ever_other"=fup3yr_episode_months_deid $preperiod_ever_other,
"preperiod_drug_use_days"=fup3yr_episode_months_deid $preperiod_drug_use_days,
"preperiod_ever_mt_gluc_or_lip"=fup3yr_episode_months_deid $preperiod_ever_mt_gluc_or_lip,
"preperiod_ever_rx_antidiab"=fup3yr_episode_months_deid $preperiod_ever_rx_antidiab,
"preperiod_ever_rx_cm_nondiab"=fup3yr_episode_months_deid $preperiod_ever_rx_cm_nondiab,
"preperiod_ever_rx_other"=fup3yr_episode_months_deid $preperiod_ever_rx_other,
"calculated_age"=fup3yr_episode_months_deid $calculated_age,
"preperiod_er_mhsa"=fup3yr_episode_months_deid $preperiod_er_mhsa,
"preperiod_er_nonmhsa"=fup3yr_episode_months_deid $preperiod_er_nonmhsa,
"preperiod_er_injury"=fup3yr_episode_months_deid $preperiod_er_injury,
"preperiod_cond_mhsa"=fup3yr_episode_months_deid $preperiod_cond_mhsa,
"preperiod_cond_nonmhsa"=fup3yr_episode_months_deid $preperiod_cond_nonmhsa,
"preperiod_cond_injury"=fup3yr_episode_months_deid $preperiod_cond_injury,
"preperiod_los_mhsa"=fup3yr_episode_months_deid $preperiod_los_mhsa,
"preperiod_los_nonmhsa"=fup3yr_episode_months_deid $preperiod_los_nonmhsa,
"preperiod_los_injury"=fup3yr_episode_months_deid $preperiod_los_injury)
colnames(L.unscaled)[which(colnames(L.unscaled)=="West Virginia")] <- "West_Virginia"
L <- L.unscaled
scale.continuous <- TRUE
if(scale.continuous){
continuous.vars <- c("calculated_age","preperiod_drug_use_days","preperiod_er_mhsa","preperiod_er_nonmhsa","preperiod_er_injury","preperiod_cond_mhsa",
"preperiod_cond_nonmhsa","preperiod_cond_injury","preperiod_los_mhsa","preperiod_los_nonmhsa","preperiod_los_injury")
L[,continuous.vars] <- scale(L[,continuous.vars]) # scale continuous vars
}
use.SL <- FALSE
if(use.SL==FALSE){ # exclude multicolinear variables for GLM (VIF)
glm.exlude <- c("California")
L <- L[,!colnames(L)%in%glm.exlude]
}
# Censoring and median follow-up time (Table A2)
A <- factor(fup3yr_episode_months_deid$drug_group) # categorical
if(use.simulated==FALSE){
C <- dummify(factor(fup3yr_episode_months_deid$censoring_cause_4cats)) # Overall cause of censoring - 4 categories,
print(aggregate(C, by=list(A), FUN=mean))
print(summary(C))
days <- fup3yr_episode_months_deid$days_to_end_of_followup # Days to end of followup (min (dod_both_sources,censored_date,days_to_index_plus_3yrs))
print(aggregate(days, by=list(A), FUN=mean))
print(summary(days))
}
##Kaplan-Meier Survival Curves
## plot Kaplan-Meier survival curves for each treatment group
fup3yr_episode_months_deid$month_number <- fup3yr_episode_months_deid$month_number+1 # baseline starts at t=1
# Censoring dummy
fup3yr_episode_months_deid$censor <- ifelse(fup3yr_episode_months_deid$days_to_censored <= (1095/36)*fup3yr_episode_months_deid$month_number,1,0)
print(aggregate(fup3yr_episode_months_deid$censor, by=list(A), FUN=mean))
print(summary(fup3yr_episode_months_deid$censor))
# Create the survival object
surv_obj <- Surv(time = fup3yr_episode_months_deid$month_number, event = fup3yr_episode_months_deid$censor)
# fit Kaplan-Meier survival curves for each drug group
km_fits <- survfit(surv_obj ~ factor(fup3yr_episode_months_deid$drug_group))
# Plot Kaplan-Meier survival curves
g <- ggsurvplot(
fit = km_fits,
data = fup3yr_episode_months_deid,
palette="lancet",
fun = "pct",
ylab= "Share of patients uncensored (%)",
xlab= "Month",
legend="bottom",
legend.title="Antipsychotic",
legend.labs=c("Reference","A","B","C","D","E"),
)
png(paste0(output_dir,"censoring_KM_plot.png"),width = 780, height = 780)
print(g)
dev.off() # Close the file
# KZ - change Y-axis scale for KM curve plot
g$plot <- g$plot + ylim(c(0.90, 1))
png(paste0(output_dir,"censoring_KM_plot_yaxis_90_to_100.png"),width = 780, height = 780)
print(g)
dev.off() # Close the file
## Restricted mean survival time (RMST)
# Identify the unique drug groups
proper <- function(x) paste0(toupper(substr(x, 1, 1)), tolower(substring(x, 2)))
fup3yr_episode_months_deid$drug_group <- proper(fup3yr_episode_months_deid$drug_group)
unique_drug_groups_withref <- unique(factor(fup3yr_episode_months_deid$drug_group))
unique_drug_groups <- unique_drug_groups_withref[unique_drug_groups_withref != "Aripiprazole"] # omit Reference
unique_drug_groups <- droplevels(unique_drug_groups)
# Initialize an empty vector to hold RMST values
rmst_values_nocovars <- rmst_values_covars <- numeric(length(unique_drug_groups))
# Calculate RMST difference between reference drug and each comparator
# comparing each treatment drug vs. Reference
# status =1: censoring occurred
rmst_values_nocovars <- lapply(1:length(unique_drug_groups), function(i)
rmst2(time = fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$month_number,
status = fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$censor,
arm = ifelse(fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$drug_group==unique_drug_groups[i],1,0)))
rmst_values_covars <- lapply(1:length(unique_drug_groups), function(i)
rmst2(time = fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$month_number,
status = fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$censor,
arm = ifelse(fup3yr_episode_months_deid[fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole"),]$drug_group==unique_drug_groups[i],1,0),
covariates=L[which(fup3yr_episode_months_deid$drug_group%in%c(as.character(unique_drug_groups[i]),"Aripiprazole")),]))
names(rmst_values_covars) <- names(rmst_values_nocovars) <- unique_drug_groups
for(i in 1:length(unique_drug_groups)){
print(paste("RMST differences between Aripiprazole (arm=0) and comparator (arm=1)", unique_drug_groups[i]))
print(rmst_values_nocovars[[i]])
print(rmst_values_covars[[i]])
}
## Cox Proportional Hazards Models
## assess the impact of treatment assignment and other covariates on the hazard of being censored
# Fit the Cox proportional hazards model with and without covars
cox_model_covars <- coxph(surv_obj ~ fup3yr_episode_months_deid$drug_group + L)
cox_model_no_covars <- coxph(surv_obj ~ fup3yr_episode_months_deid$drug_group)
# Summary of the model
summary(cox_model_covars)
summary(cox_model_no_covars)
# Test the assumption of proportional hazards
cox_zph_covars <- cox.zph(cox_model_covars)
print(cox_zph_covars)
cox_zph_no_covars <- cox.zph(cox_model_no_covars)
print(cox_zph_no_covars)
# Plot the scaled Schoenfeld residuals to visualize the assumption
png(paste0(output_dir,"schoenfeld_plot_covars.png"),width = 780, height = 780)
plot(cox_zph_covars, xlab="Month")
dev.off() # Close the file
png(paste0(output_dir,"schoenfeld_plot_no_covars.png"),width = 780, height = 780)
plot(cox_zph_no_covars, xlab="Month")
dev.off() # Close the file