forked from HeroCC/RedditSteamGameInfo
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSteamRemovedGame.py
295 lines (268 loc) · 11.5 KB
/
SteamRemovedGame.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
import re
import json
import calendar
import time
import requests
import dateutil.parser
from dateutil.parser import ParserError
from bs4 import BeautifulSoup
from SteamGame import SteamGame
class SteamRemovedGame:
def __init__(self, appid):
self.appID = appid
while True:
try:
archive_json = requests.get(
"https://web.archive.org/cdx/search/cdx?url=store.steampowered.com/app/" + appid + "/*&fl=original,timestamp&filter=statuscode:200&output=json",
timeout=15)
break
except requests.exceptions.RequestException:
print("Archive.org request timeout: sleep for 15 seconds and try again")
time.sleep(15)
if 'json' in archive_json.headers.get('Content-Type'):
self.json = self.filterjson(archive_json)
else:
return None
if not self.json:
# invalid, try for old Steam layout
while True:
try:
archive_json = requests.get(
"https://web.archive.org/cdx/search/cdx?url=store.steampowered.com/app/" + appid + "/&fl=original,timestamp&filter=statuscode:200&output=json",
timeout=15)
break
except requests.exceptions.RequestException:
print("Archive.org request timeout: sleep for 15 seconds and try again")
time.sleep(15)
if 'json' in archive_json.headers.get('Content-Type'):
self.json = self.filterjson(archive_json)
else:
return None
if not self.json:
return None
self.url, self.date = self.urldate()
while True:
try:
self.gamePage = BeautifulSoup(
requests.get(
self.url,
cookies={
"birthtime": "640584001",
"lastagecheckage": "20-April-1990",
"mature_content": "1",
},
timeout=15).text,
"html.parser",
)
break
except requests.exceptions.RequestException:
print("Archive.org request timeout: sleep for 15 seconds and try again")
time.sleep(15)
self.title = self.title()
self.gettype = self.gettype()
self.price = self.getprice()
self.asf = self.getasf()
self.achievements = SteamGame.getachev(self)
self.unreleased = SteamGame.isunreleased(self)
self.isearlyaccess = SteamGame.isearlyaccess(self)
self.unreleasedtext = SteamGame.getunreleasedtext(self)
self.blurb = self.getDescriptionSnippet()
self.reviewsummary = SteamGame.reviewsummary(self)
self.reviewdetails, self.lowreviews = SteamGame.reviewdetails(self)
self.genres = self.genres()
self.usertags = SteamGame.usertags(self)
if self.gettype != "game":
self.basegame = self.basegame()
self.releasedate = self.releasedate()
self.nsfw = SteamGame.nsfw(self)
self.plusone = False
self.developers, self.developers_num = self.developers()
if self.gettype == "game":
self.cards = SteamGame.getcards(self)
self.pcgamingwiki = SteamGame.pcgamingwiki(self, self.appID)
@classmethod
def filterjson(cls, archive_json):
archive_json = json.loads(archive_json.text)
if len(archive_json) > 0:
del archive_json[0]
# remove agecheck pages
archive_json = [entry for entry in archive_json if not re.search("(agecheck)", entry[0])]
# keep only english pages
archive_json = [entry for entry in archive_json if not re.search(r"(\?l=)(?!english)", entry[0])]
return archive_json
return []
def urldate(self):
newest_date = ''
newest_url = ''
for entry in self.json:
if newest_date == '' or entry[1] > newest_date:
newest_date = entry[1]
newest_url = entry[0]
archive_url = "https://web.archive.org/web/" + newest_date + "/" + newest_url
year = newest_date[0:4]
month = newest_date[4:6]
month = calendar.month_name[int(month)]
day = newest_date[6:8]
archive_date = month + " " + str(int(day)) + ", " + year
return archive_url, archive_date
def title(self):
title = self.gamePage.title.string.replace(" on Steam", "")
return re.sub(r"Save\s[0-9]+%\son\s", "", title)
def gettype(self):
gettype = "game"
description = self.gamePage.find("div", {"class": "glance_details"})
if description is not None:
if "requires the base game" in description.text:
gettype = "dlc"
elif "additional content for" in description.text:
gettype = "music"
return gettype
def getprice(self):
finalprice = self.gamePage.find("div", {"class": "game_purchase_price"})
if finalprice is not None:
finalprice = finalprice.string.strip()
if finalprice == "Free" or finalprice == "Free to Play":
finalprice = "Free"
return finalprice, ""
finalprice = "No price found"
return finalprice, ""
def isfree(self):
return False
def getasf(self):
app_id = self.appID
return "a/" + str(app_id), "app"
def getDescriptionSnippet(self):
snippet = self.gamePage.find("div", class_="game_description_snippet")
if snippet is None:
snippet = self.gamePage.find("div", class_="game_area_description")
get_description = list(snippet.strings)[2].strip()
if get_description != "":
return get_description
return ""
return snippet.string.strip()
def genres(self):
details_blocks = self.gamePage.find_all("div", class_="details_block")
for block in details_blocks:
details_a = block.find_all("a")
genres = []
if len(details_a) != 0:
for link in details_a:
if "/genre/" in link.get('href'):
genres.append(link.text.strip())
if len(genres) != 0:
return ", ".join(genres[:3])
return False
def basegame(self):
if self.gettype != "game":
description = self.gamePage.find("div", {"class": "glance_details"})
basegame_link = description.find("a")
basegame_href = basegame_link.get('href')
basegame_href = basegame_href.split("/")
app = basegame_href.index("app") + 1
appid = basegame_href[app]
while True:
try:
basegame_json = requests.get(
"https://web.archive.org/cdx/search/cdx?url=store.steampowered.com/app/" + appid + "/*&fl=original,timestamp&filter=statuscode:200&output=json",
timeout=15)
break
except requests.exceptions.RequestException:
print("Archive.org request timeout: sleep for 15 seconds and try again")
time.sleep(15)
if 'json' in basegame_json.headers.get('Content-Type'):
basegame_data = self.filterjson(basegame_json)
else:
return None
if not self.json:
# invalid
return None
def basegameurl():
newest_date = ''
newest_url = ''
for entry in basegame_data:
if newest_date == '' or entry[1] > newest_date:
newest_date = entry[1]
newest_url = entry[0]
archive_url = "https://web.archive.org/web/" + newest_date + "/" + newest_url
return archive_url
url = basegameurl()
while True:
try:
basegamePage = BeautifulSoup(
requests.get(
url,
cookies={
"birthtime": "640584001",
"lastagecheckage": "20-April-1990",
"mature_content": "1",
},
timeout=15).text,
"html.parser",
)
break
except requests.exceptions.RequestException:
print("Archive.org request timeout: sleep for 15 seconds and try again")
time.sleep(15)
def basegameisfree():
price = basegamePage.find("div", {"class": "game_purchase_price"})
if price is not None and "Free" in price.string.strip():
return True
return False
def basegameprice():
price = basegamePage.find("div", {"class": "game_purchase_price"})
if price is None:
price = basegamePage.find("div", {"class": "discount_original_price"})
if price is not None:
return price.string.strip()
return "Free"
def basegamename():
title = basegamePage.title.string.replace(" on Steam", "")
return re.sub(r"Save\s[0-9]+%\son\s", "", title)
price = basegameprice()
free = basegameisfree()
discount = False
name = basegamename()
return appid, name, price, "", free, discount, url
def releasedate(self):
release_divs = self.gamePage.find_all("div", class_="release_date")
for div in release_divs:
if div is not None:
release_date = div.find("div", {"class": "date"})
if release_date is None:
release_date = div.find("span", {"class": "date"})
if release_date is not None:
try:
date_abbr = dateutil.parser.parse(release_date.string)
except ParserError:
return release_date.string
try:
date_full = time.strftime('%B %e, %Y', date_abbr.timetuple())
except TypeError:
return release_date.string
else:
return date_full.replace(" ", " ")
details = self.gamePage.find("div", class_="details_block")
if details is not None:
release_date = details.find(text=re.compile('Release Date')).next_element.strip()
try:
date_abbr = dateutil.parser.parse(release_date)
except ParserError:
return release_date
try:
date_full = time.strftime('%B %e, %Y', date_abbr.timetuple())
except TypeError:
return release_date
else:
return date_full.replace(" ", " ")
return False
def developers(self):
developers_div = self.gamePage.find("div", id="developers_list")
if developers_div is not None:
count_a = developers_div.find_all("a")
if count_a is not None:
count = len(count_a)
else:
count = 1
developers = developers_div.text.strip()
return developers, count
return False