-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
44 lines (32 loc) · 1.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Author: Ninad Sachania
import bs4 as bs
import requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}
URL = 'https://blog.codinghorror.com'
FILENAME = 'urls.html'
source = requests.get(URL).text
soup = bs.BeautifulSoup(source, 'lxml')
f = open(FILENAME, 'a')
for i in range(1, 286):
source = requests.get(URL + '/page/' + str(i) +'/').text
soup = bs.BeautifulSoup(source, 'lxml')
dates = []
for timedate in soup.find_all('time'):
dates.append(timedate.text)
links = []
for h2 in soup.find_all('h2', class_='post-title'):
string = h2.a.text
href = URL + h2.a.get('href')
final_string = '<a href="' + href + '">' + str(string) + '</a>'
links.append(final_string)
if len(dates) != len(links):
print("Something's wrong @ " + str(i))
exit(-1)
else:
for i, date in enumerate(dates):
f.write(links[i] + ' <span> ' + date + '</span>')
f.write('\n')
f.write('<br>')
f.write('\n')
print(links[i], date)
print('Done')