-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmakefile
182 lines (135 loc) · 5.82 KB
/
makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Makefile for running the analysis
# Python executable
PYTHON = python3
# R executable
R = rscript
# R scripts
RAW_TO_RDS = $(R) R/construct-features.R
MK_TRAINING_VALIDATION = $(R) R/make-training-and-validation.R
RCMD = $(R) R/rds-to-questions-cli.R
# Python command utilities
XLSX_TO_CSV = $(PYTHON) python/xlsx_to_single_csv.py
PLOT_CV_ROC = $(PYTHON) python/plot_cv_roc.py
PLOT_VAL_ROC = $(PYTHON) python/plot_val_roc.py
RUN_CV = $(PYTHON) python/run_cv_analysis.py
RUN_VAL = $(PYTHON) python/run_val_analysis.py
# .rds data files
RTRAINING = rds-data/training.rds
RVALIDATION = rds-data/validation.rds
# Lists
REPORT_ROCS = \
reports/roc_cv_q1_p2.pdf\
reports/roc_cv_q1_p3.pdf\
reports/roc_cv_q1_p4.pdf\
reports/roc_cv_q1_p5.pdf\
reports/roc_cv_q2_p2.pdf\
reports/roc_cv_q2_p3.pdf\
reports/roc_cv_q2_p4.pdf\
reports/roc_cv_q2_p5.pdf\
reports/roc_cv_q3_p2.pdf\
reports/roc_cv_q3_p3.pdf\
reports/roc_cv_q3_p4.pdf\
reports/roc_cv_q3_p5.pdf\
reports/validation_q1_p2.pdf\
reports/validation_q1_p3.pdf\
reports/validation_q1_p4.pdf\
reports/validation_q1_p5.pdf\
reports/validation_q2_p2.pdf\
reports/validation_q2_p3.pdf\
reports/validation_q2_p4.pdf\
reports/validation_q2_p5.pdf\
reports/validation_q3_p2.pdf\
reports/validation_q3_p3.pdf\
reports/validation_q3_p4.pdf\
reports/validation_q3_p5.pdf
PUBLISHED_FIGURES = \
reports/roc_cv_q1_p5_clean.pdf\
reports/roc_cv_q3_p4_clean.pdf\
reports/validation_q1_p5_clean.pdf\
reports/validation_q3_p4_clean.pdf\
.PHONY: all all-clean-data published-figures
.PRECIOUS:\
python-results/roc_cv_q%_p2.pkl\
python-results/roc_cv_q%_p3.pkl\
python-results/roc_cv_q%_p4.pkl\
python-results/roc_cv_q%_p5.pkl\
python-results/validation_q%_p2.pkl\
python-results/validation_q%_p3.pkl\
python-results/validation_q%_p4.pkl\
python-results/validation_q%_p5.pkl
all: analysis-reports published-figures
analysis-reports: $(REPORT_ROCS)
published-figures: $(PUBLISHED_FIGURES)
all-clean-data: plain-clean-data imputed-clean-data
plain-clean-data: q1-training.csv q2-training.csv q3-training.csv q1-validation.csv q2-validation.csv q3-validation.csv
imputed-clean-data: q1-training-imputed.csv q2-training-imputed.csv q3-training-imputed.csv q1-validation-imputed.csv q2-validation-imputed.csv q3-validation-imputed.csv
# CV pickle files
python-results/roc_cv_q%_p2.pkl: clean-data/q%-training.csv python-results
$(RUN_CV) $< 2 $@
python-results/roc_cv_q%_p3.pkl: clean-data/q%-training.csv python-results
$(RUN_CV) $< 3 $@
python-results/roc_cv_q%_p4.pkl: clean-data/q%-training.csv python-results
$(RUN_CV) $< 4 $@
python-results/roc_cv_q%_p5.pkl: clean-data/q%-training.csv python-results
$(RUN_CV) $< 5 $@
# Validation pickle files
python-results/validation_q%_p2.pkl: clean-data/q%-training.csv clean-data/q%-validation.csv
mkdir -p $(@D)
$(RUN_VAL) $^ 2 $@
python-results/validation_q%_p3.pkl: clean-data/q%-training.csv clean-data/q%-validation.csv
mkdir -p $(@D)
$(RUN_VAL) $^ 3 $@
python-results/validation_q%_p4.pkl: clean-data/q%-training.csv clean-data/q%-validation.csv
mkdir -p $(@D)
$(RUN_VAL) $^ 4 $@
python-results/validation_q%_p5.pkl: clean-data/q%-training.csv clean-data/q%-validation.csv
mkdir -p $(@D)
$(RUN_VAL) $^ 5 $@
# PDF figures in paper
reports/roc_cv_q1_p5_clean.pdf: python-results/roc_cv_q1_p5.pkl python-results
$(PLOT_CV_ROC) $< $@
reports/validation_q1_p5_clean.pdf: python-results/validation_q1_p5.pkl python-results
$(PLOT_VAL_ROC) python-results/validation_q1_p5.pkl reports/validation_q1_p5_clean.pdf
reports/roc_cv_q3_p4_clean.pdf: python-results/roc_cv_q3_p4.pkl python-results
$(PLOT_CV_ROC) python-results/roc_cv_q3_p4.pkl reports/roc_cv_q3_p4_clean.pdf
reports/validation_q3_p4_clean.pdf: python-results/validation_q3_p4.pkl python-results
$(PLOT_VAL_ROC) python-results/validation_q3_p4.pkl reports/validation_q3_p4_clean.pdf
# PDF reports recipes
reports/roc_%.pdf: python-results/roc_%.pkl reports
$(PLOT_CV_ROC) $< $@ t
reports/validation_%.pdf: python-results/validation_%.pkl reports
$(PLOT_VAL_ROC) $< $@ t
# Directory creation recipes
clean-data python-results reports rds-data:
mkdir -p $@
# Clean data file recipes
clean-data/q1-training-imputed.csv: clean-data $(RTRAINING)
$(RCMD) $@ q1 training impute $(TRAINING)
clean-data/q2-training-imputed.csv: clean-data $(RTRAINING)
$(RCMD) $@ q2 training impute $(TRAINING)
clean-data/q3-training-imputed.csv: clean-data $(RTRAINING)
$(RCMD) $@ q3 training impute $(TRAINING)
clean-data/q1-validation-imputed.csv: clean-data $(RVALIDATION) $(RTRAINING)
$(RCMD) $@ q1 validation impute $(RTRAINING) $(RVALIDATION)
clean-data/q2-validation-imputed.csv: clean-data $(RVALIDATION) $(RTRAINING)
$(RCMD) $@ q2 validation impute $(TRAINING) $(VALIDATION)
clean-data/q3-validation-imputed.csv: clean-data $(RVALIDATION) $(RTRAINING)
$(RCMD) $@ q3 validation impute $(RTRAINING) $(RVALIDATION)
clean-data/q1-training.csv: clean-data $(RTRAINING)
$(RCMD) $@ q1 training none $(RTRAINING)
clean-data/q2-training.csv: clean-data $(RTRAINING)
$(RCMD) $@ q2 training none $(RTRAINING)
clean-data/q3-training.csv: clean-data $(RTRAINING)
$(RCMD) $@ q3 training none $(RTRAINING)
clean-data/q1-validation.csv: clean-data $(RVALIDATION)
$(RCMD) $@ q1 validation none $(RVALIDATION)
clean-data/q2-validation.csv: $(RVALIDATION)
$(RCMD) $@ q2 validation none $(RVALIDATION)
clean-data/q3-validation.csv: $(RVALIDATION)
$(RCMD) $@ q3 validation none $(RVALIDATION)
$(RTRAINING) $(RVALIDATION): rds-data/labeled.features.rds raw-data/csv/training-ids.csv
$(MK_TRAINING_VALIDATION)
rds-data/labeled.features.rds: raw-data/csv/all.csv raw-data/csv/labels.csv rds-data
$(RAW_TO_RDS)
raw-data/csv/all.csv: raw-data/NewMMDataALL_ForPublication.xlsx raw-data/All_OldMasterMixData_For_Publication.xlsx
$(XLSX_TO_CSV) raw-data/NewMMDataALL_ForPublication.xlsx raw-data/All_OldMasterMixData_For_Publication.xlsx -o raw-data/csv/all.csv