-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
380 lines (277 loc) · 13 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
################################################################################
#
# Process data related to disappearances in Mexico.
#
# Run make help to see all commands.
#
# make all will build the site.
#
# This is an auto-documenting Makefile:
#
# Add sections to help with a line like:
#
# ##@ Section Name
#
# Add commands by following a target line with a double-# comment
# like this:
#
# clean: ## Delete all files".
#
################################################################################
# Include variables such as list of files from INEGI
include Makefile.vars
# Include .env configuration
include .env
export
# Activate Python environment
PIPENV = pipenv run
# Functions can be found in sql/functions
FUNCTIONS = $(basename $(notdir $(wildcard sql/functions/*.sql)))
# Views can be found in sql/views
VIEWS = $(basename $(notdir $(wildcard sql/views/*.sql)))
# Schemas are used to compartmentalize various types of data tables or views
SCHEMAS = views processed raw
# Data and map tables need to be statically defined; they depend on remote files
DATAFILES = $(basename $(notdir $(wildcard sql/raw/*.sql)))
SHAPEFILES = areas_geoestadisticas_estatales areas_geoestadisticas_municipales
# Different directories we can clean
DATA_DIRECTORIES = shapefiles processed stats geojson mbtiles
DOWNLOAD_DIRECTORIES = downloads
##@ Basic usage
.DEFAULT_GOAL := all
.PHONY: all
all: views ## Build all
.PHONY: views
views: load $(patsubst %, db/views/%, $(VIEWS)) ## Make all views
.PHONY: load
load: csvs shapefiles $(patsubst %, db/processed/%, $(DATAFILES)) $(patsubst %, db/processed/%, $(SHAPEFILES)) ## Make all tables from data files
.PHONY: csvs
csvs: $(patsubst %, db/csv/%, $(DATAFILES))
.PHONY: shapefiles
shapefiles: $(patsubst %, db/shapefiles/%, $(SHAPEFILES))
.PHONY: clean
clean: dropdb clean/data ## Clean data files and databases (but not downloads)
.PHONY: help
help: ## Display this help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z\%\\.\/_-]+:.*?##/ { printf "\033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
##@ Database views
# Many of these are explicitly defined because the dependency graph must be manually specified.
define create_view
@(psql -c "\d views.$(subst db/views/,,$@)" > /dev/null 2>&1 && \
echo "view $(subst db/views/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef $<
endef
.PHONY: db/views/%
db/views/%: sql/views/%.sql load ## Create view % specified in sql/views/%.sql (will load all data)
$(call create_view)
.PHONY: db/views/date_distributions
db/views/date_distribution: sql/views/date_distribution.sql db/processed/cenapi db/functions/calculate_percentiles
$(call create_view)
.PHONY: db/views/cenapi_estado_by_month
db/views/cenapi_estado_by_month: sql/views/cenapi_estado_by_month.sql db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/cenapi_estado_by_year
db/views/cenapi_estado_by_year: sql/views/cenapi_estado_by_year.sql db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/cenapi_by_month
db/views/cenapi_by_month: sql/views/cenapi_by_month.sql db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/cenapi_by_year
db/views/cenapi_by_year: sql/views/cenapi_by_year.sql db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/census_estatales
db/views/census_estatales: sql/views/census_estatales.sql db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/census_municipios
db/views/census_municipios: sql/views/census_municipios.sql db/processed/base_municipios_datos db/views/estatales db/views/municipales
$(call create_view)
.PHONY: db/views/cenapi_audit
db/views/cenapi_audit: sql/views/cenapi_audit.sql db/extensions/hstore db/processed/cenapi ## Audit CENAPI data
$(call create_view)
.PHONY: db/views/cenapi_estado_evento_by_month
db/views/cenapi_estado_evento_by_month: sql/views/cenapi_estado_evento_by_month.sql db/views/estatales ## Summaries
$(call create_view)
.PHONY: db/views/cenapi_evento_by_month
db/views/cenapi_evento_by_month: sql/views/cenapi_evento_by_month.sql db/views/municipales ## Summaries
$(call create_view)
##@ Database structure
define create_extension
@(psql -c "\dx $(subst db/extensions/,,$@)" | grep $(subst db/extensions/,,$@) > /dev/null 2>&1 && \
echo "extension $(subst db/extensions/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ec "CREATE EXTENSION $(subst db/extensions/,,$@)"
endef
define create_raw_table
@(psql -c "\d raw.$(subst db/raw/,,$@)" > /dev/null 2>&1 && \
echo "table raw.$(subst db/raw/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef $<
endef
define create_processed_table
@(psql -c "\d processed.$(subst db/processed/,,$@)" > /dev/null 2>&1 && \
echo "table processed.$(subst db/processed/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef $<
endef
define create_schema
@(psql -c "\dn $(subst db/schemas/,,$@)" | grep $(subst db/schemas/,,$@) > /dev/null 2>&1 && \
echo "schema $(subst db/schemas/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qaX1ec "CREATE SCHEMA $(subst db/schemas/,,$@)"
endef
define load_raw_shapefile
@(psql -c "\d raw.$(subst db/shapefiles/,,$@)" > /dev/null 2>&1 && \
echo "table raw.$(subst db/shapefiles/,,$@) exists") || \
shp2pgsql $< raw.$(subst db/shapefiles/,,$@) | psql -v ON_ERROR_STOP=1 -q
endef
define load_raw_csv
@(psql -Atc "select count(*) from raw.$(subst db/csv/,,$@)" | grep -v -w "0" > /dev/null 2>&1 && \
echo "raw.$(subst db/csv/,,$@) is not empty") || \
psql -v ON_ERROR_STOP=1 -qX1ec "\copy raw.$(subst db/csv/,,$@) from '$(CURDIR)/$<' with delimiter ',' csv header;"
endef
define create_function
@(psql -c "\df $(subst db/functions/,,$@)" | grep $(subst db/functions/,,$@) > /dev/null 2>&1 && \
echo "function $(subst db/functions/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef sql/functions/$(subst db/functions/,,$@).sql
endef
.PHONY: db
db: tunnel ## Create database
@(psql -c "SELECT 1" > /dev/null 2>&1 && \
echo "database $(PGDATABASE) exists") || \
createdb -e $(PGDATABASE) -E UTF8 -T template0 --locale=en_US.UTF-8
.PHONY: db/extensions/%
db/extensions/%: db ## Create extension % (where % is 'hstore', 'postgis', etc)
$(call create_extension)
.PHONY: db/vacuum
db/vacuum: # Vacuum db
psql -v ON_ERROR_STOP=1 -qec "VACUUM ANALYZE;"
.PHONY: db/schemas
db/schemas: $(patsubst %, db/schemas/%, $(SCHEMAS)) ## Make all schemas
.PHONY: db/schemas/%
db/schemas/%: db # Create schema % (where % is 'raw', etc)
$(call create_schema)
.PHONY: db/functions
db/functions: $(patsubst %, db/functions/%, $(FUNCTIONS)) ## Make all functions
.PHONY: db/functions/%
db/functions/%: db
$(call create_function)
.PHONY: db/searchpath
db/searchpath: db/schemas ## Set up (hardcoded) schema search path
psql -v ON_ERROR_STOP=1 -qX1c "ALTER DATABASE $(PGDATABASE) SET search_path TO public,views,processed,raw;"
.PHONY: db/raw/%
db/raw/%: sql/raw/%.sql db/searchpath ## Create table % from sql/raw/%.sql
$(call create_raw_table)
.PHONY: db/shapefiles/%
db/shapefiles/%: data/shapefiles/%.shp db/searchpath db/extensions/postgis ## Load table % from data/shapefiles/%.shp
$(call load_raw_shapefile)
.PHONY: db/csv/%
db/csv/%: data/processed/%.csv db/raw/% ## Load table % from data/downloads/%.csv
$(call load_raw_csv)
.PHONY: db/processed/%
db/processed/%: sql/processed/%.sql db/functions db/schemas # Make table cleaned and processed tables
$(call create_processed_table)
.PHONY: dropschema/%
dropschema/%: # @TODO wrap in detection
psql -v ON_ERROR_STOP=1 -qX1c "DROP SCHEMA IF EXISTS $* CASCADE;"
.PHONY: dropdb
dropdb: ## Drop database
dropdb --if-exists -e $(PGDATABASE)
##@ Source data
.PHONY: download/shapefiles
download/shapefiles: data/downloads/marcos_geoestadicos_2017.zip ## Download shapefiles
.PHONY: download/gdrive
download/gdrive: $(patsubst %, data/downloads/%.csv, $(DATAFILES)) ## Download all Drive files
data/downloads/base_municipios_final_datos_01.rar:
curl -o $@ http://www.conapo.gob.mx/work/models/CONAPO/Datos_Abiertos/Proyecciones2018/base_municipios_final_datos_01.rar
data/downloads/base_municipios_final_datos_01.csv: data/downloads/base_municipios_final_datos_01.rar
unrar e $< $(dir $@) && touch $@
data/downloads/base_municipios_final_datos_02.rar:
curl -o $@ http://www.conapo.gob.mx/work/models/CONAPO/Datos_Abiertos/Proyecciones2018/base_municipios_final_datos_02.rar
data/downloads/base_municipios_final_datos_02.csv: data/downloads/base_municipios_final_datos_02.rar
unrar e $< $(dir $@) && touch $@
data/downloads/base_municipios_datos.csv: data/downloads/base_municipios_final_datos_01.csv data/downloads/base_municipios_final_datos_02.csv
xsv cat rows $^ > $@
data/downloads/enfrentamientos_violentas_sedena.csv:
curl -o $@ http://www.politicadedrogas.org/PPD/archivos/129_eventosviolentosSedena.csv
data/downloads/enfrentamientos_violentas_policia.csv:
curl -o $@ http://www.politicadedrogas.org/PPD/archivos/128_Informes-PF.csv
data/downloads/marcos_geoestadicos_2017.zip: # Download INEGI shapefiles (geostatistical shapes)
curl -o $@ http://internet.contenidos.inegi.org.mx/contenidos/Productos/prod_serv/contenidos/espanol/bvinegi/productos/geografia/marcogeo/889463142683_s.zip
$(INEGI_FILES): data/downloads/marcos_geoestadicos_2017.zip # Unzip INEGI shapefiles (see Makefile.vars for definition)
unzip -j -o $< -d data/shapefiles && touch $(INEGI_FILES)
data/downloads/%.xlsx: secrets/rclone.conf # Download %.xlsx from Google Drive
rclone --config $< copy mapadespariciones:$(@F) $(@D) && touch $@
data/downloads/%.csv: secrets/rclone.conf # Download %.csv from Google Drive
rclone --config $< copy mapadespariciones:$(@F) $(@D) && touch $@
##@ Process data
.PRECIOUS: data/exports/%.csv
data/exports/%.csv: db/views/%
psql -v ON_ERROR_STOP=1 -qX1c "\copy (select * from $*) to '$(CURDIR)/$@' with (delimiter ',', format csv, header);"
.PRECIOUS: data/processed/%.csv
data/processed/%.csv: data/downloads/%.csv # Convert encoding
iconv -f iso-8859-1 -t utf-8 $< > $@
.PRECIOUS: data/processed/colectivos_guanajuato.csv
data/processed/colectivos_guanajuato.csv: data/downloads/colectivos_guanajuato.xlsx
xlsx --sheet colectivos $< | sed '/^,*$$/d' > $@
.PRECIOUS: data/processed/rnpndo.csv
data/processed/rnpndo.csv: data/downloads/rnpndo.xlsx
xlsx --sheet Hoja1 $< > $@
.PRECIOUS: data/stats/%.csv
data/stats/%.csv: data/processed/%.csv # Get column stats and metadata with xsv
xsv stats $< > $@
# .PRECIOUS: sql/raw/%.sql
sql/raw/%.sql: data/stats/%.csv # Parse column stats into SQL schema for import
$(PIPENV) python processors/schema.py $< $@
##@ Exports
MAPVIEWS = municipales municipales_summary municipales_summary_ctr cenapi_distributed estatales
.PRECIOUS: data/geojson/%.json
data/geojson/%.json: # db/views/% ## Build geojson file from a view
ogr2ogr -f GeoJSON $@ PG:$(GDALSTRING) -sql "select * from $*"
.PRECIOUS: data/mbtiles/%.mbtiles
data/mbtiles/%.mbtiles: data/geojson/%.json
tippecanoe --generate-ids -Z0 -z13 -S 5 --hilbert --detect-shared-borders --drop-smallest-as-needed -o $@ -f $<
.PRECIOUS: data/mbtiles/municipales_summary.mbtiles
data/mbtiles/municipales_summary.mbtiles: data/geojson/municipales_summary.json
tippecanoe --generate-ids -Z0 -z13 -S 5 --hilbert --detect-shared-borders -o $@ -f $<
.PRECIOUS: data/mbtiles/cenapi_distributed.mbtiles
data/mbtiles/cenapi_distributed.mbtiles: data/geojson/cenapi_distributed.json
tippecanoe --generate-ids -Z0 -z13 -r 1.4 -o $@ -f $<
.PRECIOUS: data/mbtiles/desapariciones.mbtiles
data/mbtiles/desapariciones.mbtiles: $(patsubst %, data/mbtiles/%.mbtiles, $(MAPVIEWS))
tile-join -o $@ $^
.PHONY: mapbox
mapbox: data/mbtiles/desapariciones.mbtiles
mapbox upload $(MAPBOX_USER).$(MAPBOX_TILESET_ID) $<
.PHONY: mbview
mbview: data/mbtiles/desapariciones.mbtiles
MAPBOX_ACCESS_TOKEN=$(MAPBOX_PUBLIC_ACCESS_TOKEN) mbview $^
##@ Frontend data
site/src/map-styles/style.json: ## Get map style from mapbox
curl "https://api.mapbox.com/styles/v1/$(MAPBOX_USER)/$(MAPBOX_STYLE_ID)?access_token=$(MAPBOX_ACCESS_TOKEN)" | jq > $@
##@ Utilities
tunnel: $(BASTION_KEY)
ssh -i $(BASTION_KEY) -M -S $@ -fnNT -L localhost:5001:$(BASTION_DB):5432 $(BASTION_HOST)
.PHONY: tunnel/drop
tunnel/drop: tunnel
ssh -i $(BASTION_KEY) -S $< -O exit $(BASTION_HOST)
.PHONY: develop
develop: load ## Run development server
grunt --base site develop
.PHONY: dbshell
dbshell: db ## Log in to database configured in .env.
psql
.PHONY: install
install: install/npm install/pipenv install/hasura-cli ## Install project Node and Python dependencies
.PHONY: install/npm
install/npm: # Install from NPM
npm install
.PHONY: install/pipenv
install/pipenv: # Install pipenv
pipenv install
.PHONY: install/hasura-cli
install/hasura-cli: # Install hasura cli
curl -L https://github.com/hasura/graphql-engine/raw/master/cli/get.sh | bash
.PHONY: clean/data
clean/data: $(patsubst %, rm/%, $(DATA_DIRECTORIES)) ## Remove all data files
#rm -rf sql/raw/*
.PHONY: clean/downloads
clean/downloads: $(patsubst %, rm/%, $(DOWNLOAD_DIRECTORIES)) ## Remove all downloads
.PHONY: rm/%
rm/%: # Remove data/% where % is a directory name
rm -rf data/$*/*