-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAmazonWebScraper.py
83 lines (69 loc) · 2.37 KB
/
AmazonWebScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import argparse
import glob
import simplejson
import os
from datetime import date
import json
import requests
from selectorlib import Extractor
def urlInput():
URL = input('insert a url: ')
extractor = Extractor.from_yaml_file('search.yml')
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'
headers = {'User-Agent': user_agent}
fileAlreadyExist = False
r = requests.get(URL, headers=headers)
array = extractor.extract(r.text)
arrayDump = simplejson.dumps(array)
jsonDump = json.loads(arrayDump)
nameFile = './products/ '+jsonDump['name']+'.json'
for file in glob.glob(nameFile):
fileAlreadyExist = True
if(fileAlreadyExist):
with open(nameFile, 'a+') as outfile:
outfile.write(',')
simplejson.dump(array, outfile, indent=4)
pass
else:
with open(nameFile, 'w') as outfile:
outfile.write('[\n')
simplejson.dump(array, outfile, indent=4)
outfile.write('\n]')
pass
def searchProduct():
name = input('Name of Product: ')
value = input('What do you want to search?')
os.chdir(".")
for file in glob.glob("/products/*.json"):
print(file)
data_input = json.loads(file)
sale_price = 0
i = 0
if name == data_input['name']:
parameter_found = data_input[value]
print(parameter_found)
if value == 'sale_price':
i += 1
sale_price += parameter_found
sale_price = sale_price / i
print(sale_price)
def main():
argparser = argparse.ArgumentParser(
description='Amazon URL Tracker/Scraper')
argparser.add_argument(
'-url', dest='url', action='store_true', help='Insert a url for scraping')
argparser.add_argument('-search', dest='search', action='store_true',
help='Insert a Name and a keyword to search')
argparser.add_argument('-refresh', dest='refresh', action='store_true',
help='Insert a time in ms for the refresh of price')
args = argparser.parse_args()
if args.url:
urlInput()
if args.search:
searchProduct()
if __name__ == "__main__":
main()
'''
with open(nameFile) as fileInput:
data = json.load(fileInput)
'''