-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.lock
789 lines (789 loc) · 25.9 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
schema: '2.0'
stages:
normalize_weather_data:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2019-01-01
2019-12-31
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: 10cebf0be330be5ba61c9ad936958650
size: 10916
normalize_citibike_data:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2019-01-01
2019-12-31 100 10000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: 27031ccc65677d649ac69a0194c338e2
size: 2696141
outs:
- path: trip_data_normalized.pkl.gz
md5: f344408426ebf34a790d4d51fb6d0061
size: 1702594
normalize_crash_data:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2019-01-01
2019-12-31
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: 69fcc12ea38e758c632b9e8ab6e1f21b
size: 591322
join_crash_data_to_nodes:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: 69fcc12ea38e758c632b9e8ab6e1f21b
size: 591322
- path: target_map.graphml
md5: 27031ccc65677d649ac69a0194c338e2
size: 2696141
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: a4c9995592fce27e1baf5c24c64cedf3
size: 88377
join_all_data:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: a4c9995592fce27e1baf5c24c64cedf3
size: 88377
- path: trip_data_normalized.pkl.gz
md5: f344408426ebf34a790d4d51fb6d0061
size: 1702594
- path: weather_data_normalized.pkl.gz
md5: 10cebf0be330be5ba61c9ad936958650
size: 10916
outs:
- path: unified_dataset.pkl.gz
md5: 12c43e67600d01b0ce87d9c186414172
size: 1357913
create_map:
cmd: python ../../spoke/data_processing/00_create_map.py mnh_below_34th
outs:
- path: target_map.graphml
md5: 27031ccc65677d649ac69a0194c338e2
size: 2696141
associate_ctas_to_nodes:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: 27031ccc65677d649ac69a0194c338e2
size: 2696141
- path: unified_dataset.pkl.gz
md5: 12c43e67600d01b0ce87d9c186414172
size: 1357913
outs:
- path: node_id_census_tract_key.pkl.gz
md5: da9edc12e1ae2290a462c9b1d413e571
size: 8948
compute_danger:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: 54ee140b974a83495a4aa4e5e6b219ef
size: 8948
- path: target_map.graphml
md5: 27031ccc65677d649ac69a0194c338e2
size: 2696141
- path: unified_dataset.pkl.gz
md5: 12c43e67600d01b0ce87d9c186414172
size: 1357913
outs:
- path: danger_by_node_id.pkl.gz
md5: 5fdd2e436dfe56d19ece49070180d439
size: 21428
- path: target_map_consolidated.graphml
md5: d0e771646b98acd7c0cbf288207778df
size: 2396203
generate_heatmaps:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: 5fdd2e436dfe56d19ece49070180d439
size: 21428
- path: target_map_consolidated.graphml
md5: d0e771646b98acd7c0cbf288207778df
size: 2396203
normalize_weather_data@2020:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2020-01-01
2020-12-31
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: dad271c85f8d28dd2f944e4512e44b9e
size: 10423
normalize_weather_data@2021:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2021-01-01
2021-11-01
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: e308056fc894bb6ccbfe0f414e391520
size: 9130
create_map@2021:
cmd: python ../../spoke/data_processing/00_create_map.py mnh_below_34th
outs:
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
normalize_crash_data@2021:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2021-01-01
2021-11-01
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: 8d3c1c697f24a9331158464f2a9eec06
size: 412062
join_crash_data_to_nodes@2021:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: 8d3c1c697f24a9331158464f2a9eec06
size: 412062
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 75331b9b4c78690d70c1f496f031983d
size: 59155
normalize_citibike_data@2021:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2021-01-01
2021-11-01 100 10000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
outs:
- path: trip_data_normalized.pkl.gz
md5: 99c4b246d9f59426798835a0d4780b7c
size: 1611793
join_all_data@2021:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 75331b9b4c78690d70c1f496f031983d
size: 59155
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
- path: trip_data_normalized.pkl.gz
md5: 99c4b246d9f59426798835a0d4780b7c
size: 1611793
- path: weather_data_normalized.pkl.gz
md5: e308056fc894bb6ccbfe0f414e391520
size: 9130
outs:
- path: unified_dataset.parquet
md5: f77502d56ca25bb481aaccc644c214d1
size: 2373881
associate_ctas_to_nodes@2021:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
outs:
- path: node_id_census_tract_key.pkl.gz
md5: f62be295cec949b473379f7e19050e12
size: 8948
create_map@2019:
cmd: python ../../spoke/data_processing/00_create_map.py mnh_below_34th
outs:
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
normalize_weather_data@2019:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2019-01-01
2019-12-31
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: 82396bd412a3487d4639e0d3d9bcc75e
size: 10916
normalize_crash_data@2019:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2019-01-01
2019-12-31
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: 273860906793003946e524528a9be903
size: 591322
join_crash_data_to_nodes@2019:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: 273860906793003946e524528a9be903
size: 591322
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: bcc6a45a24584827eefbeaf0968689b9
size: 88378
normalize_citibike_data@2019:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2019-01-01
2019-12-31 100 100000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
outs:
- path: trip_data_normalized.pkl.gz
md5: f81e0b0817bfbdceaad53f7d096ac082
size: 16286329
join_all_data@2019:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: bcc6a45a24584827eefbeaf0968689b9
size: 88378
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
- path: trip_data_normalized.pkl.gz
md5: f81e0b0817bfbdceaad53f7d096ac082
size: 16286329
- path: weather_data_normalized.pkl.gz
md5: 82396bd412a3487d4639e0d3d9bcc75e
size: 10916
outs:
- path: unified_dataset.parquet
md5: 877ae64aaff10665ba95426fc831b60e
size: 19743322
associate_ctas_to_nodes@2019:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
outs:
- path: node_id_census_tract_key.pkl.gz
md5: efad478955e73ad8d4fb648f7cd9345c
size: 8948
compute_danger@2019:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: efad478955e73ad8d4fb648f7cd9345c
size: 8948
- path: target_map.graphml
md5: 9762351917f80b789719cbc2a47c1b26
size: 2695878
- path: unified_dataset.parquet
md5: 877ae64aaff10665ba95426fc831b60e
size: 19743322
outs:
- path: danger_by_node_id.pkl.gz
md5: 5f12580b1b351afe7522b948ec19f77e
size: 24172
- path: target_map_consolidated.graphml
md5: 4117c8c5ba0951a1710b3bb43eab9e4a
size: 2410335
create_map@2020:
cmd: python ../../spoke/data_processing/00_create_map.py mnh_below_34th
outs:
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
normalize_crash_data@2020:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2020-01-01
2020-12-31
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: 19c68cff989e1fb682bd0c27f6ca394e
size: 476706
join_crash_data_to_nodes@2020:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: 19c68cff989e1fb682bd0c27f6ca394e
size: 476706
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: d4e85584e0a26361ca206dfeff783d49
size: 57894
normalize_citibike_data@2020:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2020-01-01
2020-12-31 100 100000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
outs:
- path: trip_data_normalized.pkl.gz
md5: 9c72235ed9e7f6370c521e344c9a5410
size: 16416628
join_all_data@2020:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: d4e85584e0a26361ca206dfeff783d49
size: 57894
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
- path: trip_data_normalized.pkl.gz
md5: 9c72235ed9e7f6370c521e344c9a5410
size: 16416628
- path: weather_data_normalized.pkl.gz
md5: dad271c85f8d28dd2f944e4512e44b9e
size: 10423
outs:
- path: unified_dataset.parquet
md5: 3cfe6d31737217be4042f51f7b8d1c8e
size: 20163714
associate_ctas_to_nodes@2020:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
outs:
- path: node_id_census_tract_key.pkl.gz
md5: f9dd2db44c26f1952a31b7667d790d62
size: 8948
compute_danger@2020:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: f9dd2db44c26f1952a31b7667d790d62
size: 8948
- path: target_map.graphml
md5: 4e7182ebc0a30045e7fac7dd73ab3a95
size: 2695878
- path: unified_dataset.parquet
md5: 3cfe6d31737217be4042f51f7b8d1c8e
size: 20163714
outs:
- path: danger_by_node_id.pkl.gz
md5: cc8aab1c600c919ca62e05758909ee30
size: 22051
- path: target_map_consolidated.graphml
md5: 7bb62d85194115534ada7e6128d28c68
size: 2410335
generate_heatmaps@2019:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: 5f12580b1b351afe7522b948ec19f77e
size: 24172
- path: target_map_consolidated.graphml
md5: 4117c8c5ba0951a1710b3bb43eab9e4a
size: 2410335
generate_heatmaps@2020:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: cc8aab1c600c919ca62e05758909ee30
size: 22051
- path: target_map_consolidated.graphml
md5: 7bb62d85194115534ada7e6128d28c68
size: 2410335
compute_danger@2021:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: f62be295cec949b473379f7e19050e12
size: 8948
- path: target_map.graphml
md5: cdd5e1c1aad89aece16a33f9670c5aed
size: 2695878
- path: unified_dataset.parquet
md5: f77502d56ca25bb481aaccc644c214d1
size: 2373881
outs:
- path: danger_by_node_id.pkl.gz
md5: caafcb12706eed17ba2f4a55ae285be5
size: 20116
- path: target_map_consolidated.graphml
md5: 2d00d833b5bed41bb417d2f17c543671
size: 2410335
generate_heatmaps@2021:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: caafcb12706eed17ba2f4a55ae285be5
size: 20116
- path: target_map_consolidated.graphml
md5: 2d00d833b5bed41bb417d2f17c543671
size: 2410335
normalize_weather_data@2019_thru_2021:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2019-01-01
2021-11-01
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: 8ceeb65cf041812900b8a1cda2d4b470
size: 26398
create_map@2019_thru_2021:
cmd: python ../../spoke/data_processing/00_create_map.py mnh_below_34th
outs:
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
normalize_citibike_data@2019_thru_2021:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2019-01-01
2021-11-01 100 1000000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
outs:
- path: trip_data_normalized.pkl.gz
md5: 386dab4e417f19897c04887b40df20de
size: 155878202
normalize_crash_data@2019_thru_2021:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2019-01-01
2021-11-01
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: bb08a4c4fbdc11f18e494e50e90afdc1
size: 1434809
join_crash_data_to_nodes@2019_thru_2021:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: bb08a4c4fbdc11f18e494e50e90afdc1
size: 1434809
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 2242a4a982ee39920a14aaa671b7a776
size: 174515
join_all_data@2019_thru_2021:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 2242a4a982ee39920a14aaa671b7a776
size: 174515
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
- path: trip_data_normalized.pkl.gz
md5: 386dab4e417f19897c04887b40df20de
size: 155878202
- path: weather_data_normalized.pkl.gz
md5: 8ceeb65cf041812900b8a1cda2d4b470
size: 26398
outs:
- path: unified_dataset.parquet
md5: 7f084a095fee59bbba0ed14e27f1ad8b
size: 203083300
associate_ctas_to_nodes@2019_thru_2021:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
outs:
- path: node_id_census_tract_key.pkl.gz
md5: 8b2d50753b7c6b4ff41bab09d3cef99a
size: 8948
compute_danger@2019_thru_2021:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: 8b2d50753b7c6b4ff41bab09d3cef99a
size: 8948
- path: target_map.graphml
md5: a05cabc9846eae351f436e8fc42c13e8
size: 2695878
- path: unified_dataset.parquet
md5: 7f084a095fee59bbba0ed14e27f1ad8b
size: 203083300
outs:
- path: danger_by_node_id.pkl.gz
md5: 1f0a927211d41d93bac93e6f813f7a0e
size: 28058
- path: target_map_consolidated.graphml
md5: eed0d41e9e739d5d4b49f99175678a0f
size: 2410335
generate_heatmaps@2019_thru_2021:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: 1f0a927211d41d93bac93e6f813f7a0e
size: 28058
- path: target_map_consolidated.graphml
md5: eed0d41e9e739d5d4b49f99175678a0f
size: 2410335
normalize_weather_data@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/01_normalizing_weather_data.py 2019-01-01
2021-11-01
deps:
- path: ../raw_data/weather/
md5: f67235592662850f4b0856767095f685.dir
size: 24203989
nfiles: 4
outs:
- path: weather_data_normalized.pkl.gz
md5: d1ebd94fbf2db8fde771db6d6d8913bc
size: 26398
create_map@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/00_create_map.py mnh
outs:
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
normalize_crash_data@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/02_normalizing_crash_data.py 2019-01-01
2021-11-01
deps:
- path: ../raw_data/Motor_Vehicle_Collisions_-_Crashes.csv
md5: 84bd8d0baf1943f1f3bfe319f420a176
size: 395108006
outs:
- path: crash_data_normalized.pkl.gz
md5: a34a1a7381d59bbf80567cb8120a63eb
size: 1434809
join_crash_data_to_nodes@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/03_joining_crash_data_to_nodes.py 100
deps:
- path: crash_data_normalized.pkl.gz
md5: a34a1a7381d59bbf80567cb8120a63eb
size: 1434809
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
outs:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 6345cc092c367d378fc0d8b90e217ef9
size: 468874
normalize_citibike_data@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/04_normalizing_citibike_data.py 2019-01-01
2021-11-01 100 1000000
deps:
- path: ../raw_data/citibike/
md5: b6ae53a779ce6556f3a4587ebcab0f06.dir
size: 2464917000
nfiles: 33
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
outs:
- path: trip_data_normalized.pkl.gz
md5: e375d486c6da5c2490b8f39d18c51d4b
size: 253761206
join_all_data@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/05_joining_all_data.py
deps:
- path: crash_data_normalized_with_node_graph.pkl.gz
md5: 6345cc092c367d378fc0d8b90e217ef9
size: 468874
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
- path: trip_data_normalized.pkl.gz
md5: e375d486c6da5c2490b8f39d18c51d4b
size: 253761206
- path: weather_data_normalized.pkl.gz
md5: d1ebd94fbf2db8fde771db6d6d8913bc
size: 26398
outs:
- path: unified_dataset.parquet
md5: 60f2b3f385ea1723be0fa60d2fa697e9
size: 321591083
associate_ctas_to_nodes@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/06_associate_ctas_to_nodes.py
deps:
- path: ../raw_data/2010_Census_Tracts/geo_export_85c202c5-6ec9-493e-b0ec-a13efa26758d.shp
md5: 0f6d2a4aaedc2e12f3db25f99d3f759e
size: 2802360
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
outs:
- path: node_id_census_tract_key.pkl.gz
md5: 3c3214d6cd2b33dabefac25bbd58b4c8
size: 24225
compute_danger@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/07_compute_danger.py
deps:
- path: node_id_census_tract_key.pkl.gz
md5: 3c3214d6cd2b33dabefac25bbd58b4c8
size: 24225
- path: target_map.graphml
md5: 0f25511d74f80421e86f8734c98ee281
size: 7433810
- path: unified_dataset.parquet
md5: 60f2b3f385ea1723be0fa60d2fa697e9
size: 321591083
outs:
- path: danger_by_node_id.pkl.gz
md5: df01b9ac26c6db23a40b9c42f9774f3e
size: 71783
- path: target_map_consolidated.graphml
md5: c0e76cad4767ea3eaaed5a7477e7aea8
size: 6839942
generate_heatmaps@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/08_generate_heatmaps.py
deps:
- path: danger_by_node_id.pkl.gz
md5: df01b9ac26c6db23a40b9c42f9774f3e
size: 71783
- path: target_map_consolidated.graphml
md5: c0e76cad4767ea3eaaed5a7477e7aea8
size: 6839942
prepare_training_data@2019:
cmd: python ../../spoke/data_processing/09_prepare_training_data.py 42 0.6 0.2
0.2
deps:
- path: unified_dataset.parquet
md5: 877ae64aaff10665ba95426fc831b60e
size: 19743322
outs:
- path: eval.parquet
md5: b6c8dc1f351fe72960d120fd14ef60ff
size: 9136586
- path: test.parquet
md5: 20556f2e36b949be9f14733bc05c2496
size: 9136078
- path: train.parquet
md5: 88336adc38b2a9a14fabddae14e6a62a
size: 25620354
prepare_training_data@2019_thru_2021:
cmd: python ../../spoke/data_processing/09_prepare_training_data.py 42 0.6 0.2
0.2
deps:
- path: unified_dataset.parquet
md5: 7f084a095fee59bbba0ed14e27f1ad8b
size: 203083300
outs:
- path: eval.parquet
md5: 15121d9dea35f1d5222608be2599a1e0
size: 74137542
- path: test.parquet
md5: 0a71a1679792b558fc94f634f6c73343
size: 74119796
- path: train.parquet
md5: 52fa90ea614dbb6452fe45f60a2cf4f6
size: 221774504
prepare_training_data@2021:
cmd: python ../../spoke/data_processing/09_prepare_training_data.py 42 0.6 0.2
0.2
deps:
- path: unified_dataset.parquet
md5: f77502d56ca25bb481aaccc644c214d1
size: 2373881
outs:
- path: eval.parquet
md5: 695da1e36d6f88dad4129e3b33f7cfbf
size: 911160
- path: test.parquet
md5: e80312c4bc2032341a4aedeab98fb558
size: 911177
- path: train.parquet
md5: a1b18e60e405c322434a7c00a967e86c
size: 2608316
prepare_training_data@2020:
cmd: python ../../spoke/data_processing/09_prepare_training_data.py 42 0.6 0.2
0.2
deps:
- path: unified_dataset.parquet
md5: 3cfe6d31737217be4042f51f7b8d1c8e
size: 20163714
outs:
- path: eval.parquet
md5: be5acab76d3093038900128a389c23fd
size: 9192010
- path: test.parquet
md5: aab496a7275f1eb71939d5f30870332b
size: 9204716
- path: train.parquet
md5: e9b35ac4c577078cdf2ea54165dfa7e1
size: 25841142
prepare_training_data@2019_thru_2021_all_mnh:
cmd: python ../../spoke/data_processing/09_prepare_training_data.py 42 0.6 0.2
0.2
deps:
- path: unified_dataset.parquet
md5: 60f2b3f385ea1723be0fa60d2fa697e9
size: 321591083
outs:
- path: eval.parquet
md5: 1aa0726686f21b7affbfe30440161bea
size: 134215616
- path: test.parquet
md5: fa5a185bfc135d6cd0b3e873b984a9ce
size: 134294401
- path: train.parquet
md5: fb7f60cc15e61a44cd69ada8171d66e1
size: 401771390