-
-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathfiliacao_parse.py
70 lines (59 loc) · 2.18 KB
/
filiacao_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import csv
from io import BytesIO, TextIOWrapper
from pathlib import Path
from zipfile import ZipFile
import rows
import scrapy
import utils
import settings
field_map = {
"codigo_municipio": "CODIGO DO MUNICIPIO",
"data_cancelamento": "DATA DO CANCELAMENTO",
"data_desfiliacao": "DATA DA DESFILIACAO",
"data_filiacao": "DATA DA FILIACAO",
"data_processamento": "DATA DO PROCESSAMENTO",
"data_regularizacao": "DATA DA REGULARIZACAO",
"motivo_cancelamento": "MOTIVO DO CANCELAMENTO",
"municipio": "NOME DO MUNICIPIO",
"nome": "NOME DO FILIADO",
"partido": "NOME DO PARTIDO",
"secao_eleitoral": "SECAO ELEITORAL",
"sigla_partido": "SIGLA DO PARTIDO",
"situacao": "SITUACAO DO REGISTRO",
"tipo": "TIPO DO REGISTRO",
"titulo_eleitoral": "NUMERO DA INSCRICAO",
"uf": "UF",
"zona_eleitoral": "ZONA ELEITORAL",
}
def convert_row(row):
new = {}
for new_name, old_name in field_map.items():
value = utils.unaccent(row[old_name]).upper()
if new_name.startswith("data_"):
value = str(utils.PtBrDateField.deserialize(value) or "")
new[new_name] = value
return new
class FiliadosFileParserSpider(scrapy.Spider):
name = "filiados-file-parse"
def start_requests(self):
links = rows.import_from_csv(settings.OUTPUT_PATH / "filiacao-links.csv")
for row in links:
yield scrapy.Request(
url="file://" + str(Path(row.filename).absolute()), meta=row._asdict()
)
def parse(self, response):
meta = response.request.meta
zf = ZipFile(BytesIO(response.body))
files = sorted(zf.filelist, key=lambda row: row.filename, reverse=True)
csv_fobj = None
for file_info in files:
filename = Path(file_info.filename).name
if filename.startswith("filiados_") and filename.endswith(".csv"):
csv_fobj = zf.open(file_info.filename)
break
if csv_fobj is not None:
reader = csv.DictReader(
TextIOWrapper(csv_fobj, encoding="iso-8859-15"), dialect=utils.TSEDialect
)
for row in reader:
yield convert_row(row)