-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsearch_missing_twitter.py
executable file
·98 lines (82 loc) · 2.95 KB
/
search_missing_twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, sys, csv, json
from twitter import Twitter, OAuth
from twitterconfig import KEY, SECRET, OAUTH_TOKEN, OAUTH_SECRET
BLACKLIST = [
"ATColloque",
"GNadeauDubois"
]
t = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, KEY, SECRET))
args = {
"include_entities": "false",
"count": 20
}
if len(sys.argv) < 3:
sys.stderr.write("Please input deputes/senateurs and the path for the Twitter list data\n")
exit(1)
TYPEPARLS, TWEETS_FILE = sys.argv[1:3]
typeparl = TYPEPARLS.rstrip("s")
searchwords = {
"depute": [
u"depute",
u"deputé",
u"députe",
u"député",
u"deputee",
u"deputée",
u"députee",
u"députée",
u"assemblee nationale",
u"assemblée nationale",
u"@assembleenat"
],
"senateur": [
u"senateur",
u"sénateur",
u"senatrice",
u"sénatrice",
u"senat",
u"sénat",
u"@senat"
]
}[typeparl]
with open(os.path.join(".cache", "%s.json" % TYPEPARLS)) as f:
try:
parls = dict((parl["slug"], parl) for parl in [p[typeparl] for p in json.load(f)[TYPEPARLS]])
except ValueError:
sys.stderr.write("Could not open Nos%s.fr parlementaires list" % TYPEPARLS)
exit(1)
known_accounts = set(b.lower() for b in BLACKLIST)
with open(TWEETS_FILE) as f:
for parl in csv.DictReader(f):
del(parls[parl["slug"]])
known_accounts.add(parl["twitter"].lower())
for rab in ["2012-2017", "2017-2022", "2022-2027"]:
try:
with open(TWEETS_FILE.replace(".csv", "_%s.csv" % rab)) as f:
for parl in csv.DictReader(f):
known_accounts.add(parl["twitter"].lower())
except IOError:
pass
missing = sorted(parls.values(), key=lambda x: x["nom_de_famille"])
print "There are still %s %s for which we haven't found a Twitter account." % (len(missing), TYPEPARLS)
print
for parl in missing:
search = t.users.search(q=parl["nom"], **args)
search2 = t.users.search(q="%s %s" % (typeparl, parl["nom_de_famille"]), **args)
search3 = t.users.search(q="%s %s" % (parl["nom_de_famille"], parl["nom_circo"]), **args)
goodmatches = {}
for res in search + search2 + search3:
if res["screen_name"].lower() in known_accounts:
continue
desc = res["description"].lower()
if any(word in desc for word in searchwords):
goodmatches[res["screen_name"]] = res
goodmatches[res["screen_name"]]["count"] = goodmatches[res["screen_name"]].get("count", 0) + 1
matches = sorted(goodmatches.values(), key=lambda x: x["count"], reverse=True)
if len(matches):
print " - ", len(matches), "proposals found for", parl["nom"], parl["url_nos%s" % TYPEPARLS], ":"
for g in matches:
print u" ⋅ @%s:" % g["screen_name"], g["description"], g.get("url", "") or "", "‑", "https://twitter.com/%s" % g["screen_name"]
print