-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdcard_爬留言.py
40 lines (32 loc) · 1.14 KB
/
dcard_爬留言.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import time
topic_url = "https://www.dcard.tw/f/joke/p/232224230"
count = 0 #+1
argu = "&limit=1"
url = topic_url + "/comments?after=" + str(count) + argu
# cookie = dict(__cfduid= "")
import requests
from bs4 import BeautifulSoup
UserAgent = "Chrome/77.0.3865.90"
headers = {'User-Agent': UserAgent}
response = requests.session()
source = response.get(url, headers=headers).text
# source = response.get(url, headers=headers, cookies=cookie).text
print(source[:8000])
while(1):
try:
if "id" in source:
print("第" + str(count+1) + "樓有回應")
filename = str(count+1) + ".txt"
with open(filename, "w",encoding="utf-8") as text_file:
print(source, file=text_file)
count = count + 1
url = web + str(count) + argu
source = response.get(url, headers=headers, cookies=cookie).text
time.sleep(1)
else:
time.sleep(2)
#print(".", end='')
source = response.get(url, headers=headers, cookies=cookie).text
except Exception as e:
#print(e)
time.sleep(1)