-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbqb.py
59 lines (54 loc) · 1.87 KB
/
bqb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import threading
import time
from queue import LifoQueue
import requests
from bs4 import BeautifulSoup
import re
def download_bqb(url,qsize):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
img_list = soup.find_all('img', class_='ui image lazy')
for img in img_list:
image = img.get('data-original')
# 去除标题中的特殊字符,windows下建立文件会出错
title = re.sub(r'[\\/:?<>|*]','',img.get('title'))
try:
with open('images/' + title + os.path.splitext(image)[-1], 'wb') as f:
img = requests.get(image).content
f.write(img)
except OSError:
print(title + '下载异常,源地址:' + image)
break
def fetchUrl(urlQueue):
while True:
try:
url = urlQueue.get_nowait()
qsize = urlQueue.qsize()
print(' %s 开始下载, Url: %s ,剩余页数: %s' % (threading.currentThread().name, url, qsize))
download_bqb(url,qsize)
except Exception as e:
break
print(' %s 下载完成, Url: %s ' % (threading.currentThread().name, url))
if __name__ == '__main__':
# 表情包网址
_url = 'https://fabiaoqing.com/biaoqing/lists/page/{}.html'
urlQueue = LifoQueue()
# 将url放入queue中
for page in range(1, 201):
# print(_url.format(page))
urlQueue.put(_url.format(page))
start_time = time.time()
threads = []
# 线程数量
thread_num = 10
# 开启线程
for i in range(thread_num):
t = threading.Thread(target=fetchUrl, args=(urlQueue,))
threads.append(t)
for i in threads:
i.start()
for i in threads:
i.join()
end_time = time.time()
print('done,cost', (end_time - start_time))