-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfanbox.py
139 lines (117 loc) · 5.97 KB
/
fanbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from utils import *
from pathlib import Path
import json
class Fanbox:
def __init__(self, config, log_file, save_path, max_threads=5):
self.config = config
self.log_file = log_file
self.cookies = config['cookies']
self.post_id = config['latest_post']
self.creator = config['creator']
self.max_threads = max_threads
self.save_path = save_path
# 加载日志文件
if not log_file.exists():
Path.touch(log_file)
self.log = {}
with open(log_file, 'w', encoding='utf-8') as logfile:
json.dump(self.log, logfile)
else:
with open(log_file, 'r', encoding='utf-8') as logfile:
self.log = json.load(logfile)
# 如果config中没有creator信息,则初始化creator为空字典
if not self.log.get(self.creator):
self.log[self.creator] = {}
# 搜索之前的文章
def prev_search(self, mode = 'multiple'):
post_id = self.post_id # 给局部变量赋值类变量的post_id,由于向前搜索,不影响类中原本post_id
post_data = download(self.cookies, post_id)
_prev, _next, images = analysis(post_data) # 获取当前文章的所有图片链接和前后文章id
try:
while post_id:
post_id_directory = make_path(self.save_path / post_id)
with open(post_id_directory / f'{post_id}.json', 'w', encoding='utf-8') as done:
json.dump(post_data, done, indent=4, ensure_ascii=False)
temp_log = self.log[self.creator].get(post_id, {}) # temp_log {'status':'locked','pictures':{}}
if images:
temp_log['status'] = 'unlocked'
d1 = DownloadPicture(self.save_path, post_id, self.cookies, images, temp_log, self.max_threads)
if mode == 'multiple':
d1.multi_download()
else:
d1.single_download()
self.log[self.creator].update({post_id:d1.log})
print(f"{post_id}已下载完毕")
else:
temp_log['status'] = 'locked'
self.log[self.creator].update({post_id:temp_log})
post_id = _prev
post_data = download(self.cookies, post_id)
_prev, _next, images = analysis(post_data)
except Exception as e:
print(e)
finally:
with open(self.log_file, 'w', encoding='utf-8') as _log:
json.dump(self.log, _log, ensure_ascii=False, indent=4)
def next_search(self, mode = 'multiple',update = False): #update:是否重新下载最新的post
post_data = download(self.cookies, self.post_id)
_prev, _next, images = analysis(post_data) # 获取当前文章的所有图片链接和前后文章id
if not _next and not update: # 如果没有新增文章
print('没有新增动态')
else:
try:
while self.post_id:
post_id_directory = make_path(self.save_path / self.post_id)
# 当有post_id时,下载文章信息
with open(post_id_directory / f'{self.post_id}.json', 'w', encoding='utf-8') as done:
json.dump(post_data, done, indent=4, ensure_ascii=False)
temp_log = self.log[self.creator].get(self.post_id, {}) # temp_log {'status':'locked','pictures':{}}
if images:
temp_log['status'] = 'unlocked'
d1 = DownloadPicture(self.save_path, self.post_id, self.cookies, images, temp_log, self.max_threads)
if mode == 'multiple':
d1.multi_download()
else:
d1.single_download()
self.log[self.creator].update({self.post_id:d1.log})
else:
temp_log['status'] = 'locked'
self.log[self.creator].update({self.post_id:temp_log})
self.config['latest_post'] = self.post_id # 更新最新的文章id
self.post_id = _next
post_data = download(self.cookies, self.post_id)
_prev, _next, images = analysis(post_data)
except Exception as e:
print(e)
finally:
with open(self.log_file, 'w', encoding='utf-8') as _log:
json.dump(self.log, _log, ensure_ascii=False, indent=4)
with open('config.json', 'w', encoding='utf-8') as config_:
json.dump(self.config, config_, ensure_ascii=False, indent=4)
def re_download(self, post_id, mode='multiple'):
post_data = download(self.cookies, post_id)
_prev, _next, images = analysis(post_data)
log = self.log[self.creator].get(post_id, {})
if images:
d1 = DownloadPicture(self.save_path, post_id, self.cookies, images, log, self.max_threads)
if mode == 'multiple':
d1.multi_download()
else:
d1.single_download()
self.log[self.creator].update({post_id:d1.log}) # 更新指定id的图片
else:
log['status'] = 'locked'
self.log[self.creator].update({post_id:log})
with open(self.log_file, 'w', encoding='utf-8') as _log:
json.dump(self.log, _log, ensure_ascii=False, indent=4)
if __name__ == '__main__':
config_file = Path('config.json')
with open(config_file, 'r', encoding='utf-8') as configfile:
cfg = json.load(configfile)
cre = cfg['creator']
# 保存的根目录
save_root = make_path('creator')
save_path = make_path(save_root / cre)
l_f = save_path / 'log.json'
f = Fanbox(cfg, l_f, save_path)
f.next_search()