forked from Aresius423/hahud
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cbaa4c0
commit affea91
Showing
7 changed files
with
295 additions
and
265 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
cache/ | ||
data_*/ | ||
menu.html | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import urllib | ||
import os | ||
import hashlib | ||
|
||
#non-absolute path is used for html generation, so changing this only would be unwise | ||
cachedir = os.getcwd() + "/cache/" | ||
|
||
if not os.path.exists(cachedir): | ||
os.makedirs(cachedir) | ||
|
||
def loadToCache(imgurl): | ||
if imgurl == "NotFound": | ||
return "../resources/notfound.png" | ||
extension = imgurl.split(".")[-1] | ||
hash = hashlib.md5(imgurl.encode('utf-8')).hexdigest() | ||
cacheFile = cachedir + hash + "." + extension | ||
|
||
if not os.path.isfile(cacheFile): | ||
try: | ||
urllib.request.urlretrieve(imgurl, cacheFile) | ||
except: | ||
raise | ||
return "../resources/notfound.png" | ||
|
||
return "../cache/" + hash + "." + extension |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import sqlite3 | ||
from datamodels import * | ||
|
||
import os | ||
|
||
def setupNewDB(dirpath): | ||
if not os.path.exists(dirpath): | ||
os.makedirs(dirpath) | ||
|
||
if os.path.isfile(dirpath+"/newdata.db"): | ||
os.remove(dirpath+"/newdata.db") | ||
|
||
newdb = sqlite3.connect(dirpath+"/newdata.db") | ||
|
||
try: | ||
newdb.execute("CREATE TABLE cars(id TEXT, title TEXT, url TEXT, price TEXT, img TEXT, cdata TEXT)") | ||
except sqlite3.OperationalError: | ||
print("Error setting up the database") | ||
newdb.close() | ||
quit() | ||
|
||
return newdb | ||
|
||
def insertResults(db, results): | ||
for res in results: | ||
db.execute("INSERT INTO cars VALUES (?,?,?,?,?,?)", (res.id, res.title, res.url, res.price, res.img, res.data)) | ||
db.commit() | ||
|
||
def findChanges(dirpath, results): | ||
changes = [] | ||
newIDs = list(map(lambda newresult: newresult.id, results)) | ||
|
||
if not os.path.isfile(dirpath+"/data.db"): | ||
changes = list(map(lambda item: change(item, "new", ""), results)) | ||
else: | ||
olddb = sqlite3.connect(dirpath+"/data.db") | ||
for currentCar in results: | ||
oldres = olddb.execute("SELECT * from cars WHERE id=?", [currentCar.id]).fetchone() | ||
if oldres is not None: | ||
oldcar = car(*oldres) | ||
if oldcar != currentCar: | ||
changes.append(change(currentCar, "changed", currentCar.diffFromOld(oldcar))) | ||
else: | ||
changes.append(change(currentCar, "new", "")) | ||
|
||
oldCarData = olddb.execute("SELECT * from cars").fetchall() | ||
oldCars = list(map(lambda tuple: car(*tuple), oldCarData)) | ||
for oldCar in oldCars: | ||
if oldCar.id not in newIDs: | ||
changes.append(change(oldCar, "deleted", "deleted")) | ||
|
||
olddb.close() | ||
|
||
return changes | ||
|
||
def archiveDatabase(dirpath): | ||
if os.path.isfile(dirpath+"/data.db"): | ||
os.remove(dirpath+"/data.db") | ||
os.rename(dirpath+"/newdata.db", dirpath+"/data.db") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
class query: | ||
def __init__(self, name, url): | ||
self.name = name | ||
self.url = url | ||
|
||
class change: | ||
def __init__(self, car, summary, reason): | ||
self.car = car | ||
self.summary = summary | ||
self.reason = reason | ||
|
||
def __str__(self): | ||
return self.reason + "\n" + str(self.car) | ||
|
||
def toListItem(self, template): | ||
filled = template.replace("%LISTING_REASON%", self.summary) | ||
filled = filled.replace("%LISTING_ID%", self.car.id) | ||
filled = filled.replace("%LISTING_PRICE%", self.car.price) | ||
filled = filled.replace("%LISTING_LINK%", self.car.url) | ||
filled = filled.replace("%LISTING_TITLE%", self.car.title) | ||
filled = filled.replace("%LISTING_IMAGE%", self.car.img) | ||
filled = filled.replace("%DETAILED_REASON%", self.reason) | ||
filled = filled.replace("%LISTING_DATA%", self.car.data) | ||
return filled | ||
|
||
class car: | ||
def __init__(self, id, title, url, price, img, data): | ||
self.id = id | ||
self.title = title | ||
self.url = url | ||
self.price = str(price.replace('\xa0', ' ')) | ||
self.img = img | ||
self.data = data | ||
|
||
def __str__(self): | ||
return self.id + "\n" + self.title + "\n" + self.price + "\n___________" | ||
|
||
def __eq__(self, other): | ||
if isinstance(other, car): | ||
return self.id == other.id and self.title == other.title and self.url == other.url and self.price == other.price and self.img == other.img and self.data == other.data | ||
return False | ||
|
||
def diffFromOld(self, other): | ||
difference = "" | ||
if self.title != other.title: | ||
difference += "title changed<br>\n" | ||
if self.price != other.price: | ||
difference += "price changed from " + other.price + "<br>\n" | ||
if self.img != other.img: | ||
difference += "image changed<br>\n" | ||
if self.data != other.data: | ||
difference += "data changed from: " + other.data + "<br>\n" | ||
return difference |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import requests | ||
from lxml import html | ||
from lxml import etree | ||
from lxml.etree import tostring | ||
|
||
from cache import * | ||
from datamodels import * | ||
|
||
header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',} | ||
|
||
def page(num, base): | ||
return base+"/page"+str(num) | ||
|
||
def fetch_results_from_query(query): | ||
initReq = requests.get(query.url, headers=header) | ||
initTree = html.fromstring(initReq.content) | ||
|
||
try: | ||
num_of_pages = int(initTree.xpath('//link[@rel="last"]/@href')[0].split("page")[1]) | ||
except IndexError: | ||
num_of_pages = 1 | ||
|
||
results = [] | ||
|
||
for pagenum in range(1,num_of_pages+1): | ||
print("\rProcessing page " + str(pagenum) + " out of " + str(num_of_pages) + " for query " + query.name, end='') | ||
request = requests.get(page(pagenum, query.url)) | ||
tree = html.fromstring(request.content) | ||
listings = tree.xpath('.//div[contains(@class, "row talalati-sor")]') | ||
|
||
for listing in listings: | ||
kepsor = listing.find('.//div[@class="talalatisor-kep"]') | ||
adatsor = listing.find('.//div[@class="talalatisor-adatok"]') | ||
info = adatsor.find('.//div[@class="talalatisor-info adatok"]') | ||
|
||
|
||
title = kepsor.find('.//a').get("title") | ||
url = kepsor.find('.//a').get("href") | ||
|
||
try: | ||
img = kepsor.find('.//img[@class="img-responsive lazy"]').get('data-lazyurl') | ||
except AttributeError: | ||
img = "NotFound" | ||
|
||
img = loadToCache(img) | ||
price = adatsor.find('.//div[@class="vetelar"]').text | ||
id = listing.find('.//*[@data-hirkod]').get('data-hirkod') | ||
databoxes = info.findall('.//span') | ||
maybeData = list(map(lambda databox: databox.text, databoxes)) | ||
|
||
if None in maybeData: | ||
#km in tooltip? | ||
km = info.find('.//abbr[@title="Kilométeróra állása"]') | ||
if km is not None: | ||
finalData = [x if x is not None else km.text for x in maybeData] | ||
else: | ||
finalData = [x if x is not None else "? km" for x in maybeData] | ||
|
||
else: | ||
finalData = maybeData | ||
|
||
thiscar = car(id, title, url, price, img, " ".join(finalData)) | ||
results.append(thiscar) | ||
|
||
return results |
Oops, something went wrong.