forked from Aresius423/hahud
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhahu_processor.py
65 lines (49 loc) · 2.01 KB
/
hahu_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import requests
from lxml import html
from lxml import etree
from lxml.etree import tostring
from cache import *
from datamodels import *
header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',}
def page(num, base):
return base+"/page"+str(num)
def fetch_results_from_query(query):
initReq = requests.get(query.url, headers=header)
initTree = html.fromstring(initReq.content)
try:
num_of_pages = int(initTree.xpath('//link[@rel="last"]/@href')[0].split("page")[1])
except IndexError:
num_of_pages = 1
results = []
for pagenum in range(1,num_of_pages+1):
print("\rProcessing page " + str(pagenum) + " out of " + str(num_of_pages) + " for query " + query.name, end='')
request = requests.get(page(pagenum, query.url))
tree = html.fromstring(request.content)
listings = tree.xpath('.//div[contains(@class, "row talalati-sor")]')
for listing in listings:
kepsor = listing.find('.//div[@class="talalatisor-kep"]')
adatsor = listing.find('.//div[@class="talalatisor-adatok"]')
info = adatsor.find('.//div[@class="talalatisor-info adatok"]')
title = kepsor.find('.//a').get("title")
url = kepsor.find('.//a').get("href")
try:
img = kepsor.find('.//img[@class="img-responsive lazy"]').get('data-lazyurl')
except AttributeError:
img = "NotFound"
img = loadToCache(img)
price = adatsor.find('.//div[@class="vetelar"]').text
id = listing.find('.//*[@data-hirkod]').get('data-hirkod')
databoxes = info.findall('.//span')
maybeData = list(map(lambda databox: databox.text, databoxes))
if None in maybeData:
#km in tooltip?
km = info.find('.//abbr[@title="Kilométeróra állása"]')
if km is not None:
finalData = [x if x is not None else km.text for x in maybeData]
else:
finalData = [x if x is not None else "? km" for x in maybeData]
else:
finalData = maybeData
thiscar = car(id, title, url, price, img, " ".join(finalData))
results.append(thiscar)
return results