-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmklist-vodo
executable file
·70 lines (63 loc) · 2.13 KB
/
mklist-vodo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
"""
Extract list of free movies available from vodo.net.
"""
import argparse
import json
import lxml.html
import movielib
import urllib2
import urlparse
def parsepage(args, l, url):
try:
html = movielib.http_get_read(url)
root = lxml.html.fromstring(html)
title = root.cssselect('meta[property="og:title"]')[0].get('content')
year = root.cssselect('div.title-holder span.alt')[0].text_content().strip('()')
info = {
'status' : 'free',
'freenessurl' : url,
'title' : title,
}
if '' != year:
year = int(year)
else:
year = None
imdburl = url
for a in root.cssselect("a"):
if -1 != a.attrib['href'].find("imdb.com/title/"):
imdburl = urlparse.urljoin(url, a.attrib['href'])
if '/' != imdburl[-1]:
imdburl = imdburl + '/'
if imdburl == url:
print "warning: missing imdb link for %s" % url
if args.imdblookup:
try:
imdb = movielib.imdb_find_one(title, year)
if imdb:
imdburl = imdb
info['imdblookup'] = '%s %d' % (title, year)
except KeyError: # hit this with mojobake and UTF-8 in 'Haxan')
pass
if year:
info['year'] = year
l[imdburl] = info
#print(l[imdburl])
except urllib2.HTTPError as e:
pass
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--imdblookup', action='store_true', default=False,
help='also find title IDs by searching for title/year in IMDB')
args = parser.parse_args()
url = "http://vodo.net/films/"
html = movielib.http_get_read(url)
root = lxml.html.fromstring(html)
entries = {}
for a in root.cssselect("div.sticker a"):
entryurl = urlparse.urljoin(url, a.attrib['href'])
parsepage(args, entries, entryurl)
movielib.savelist(entries, 'free-movies-vodo.json')
if __name__ == '__main__':
main()