forked from erma0/douyin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.py
569 lines (534 loc) · 28 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# -*- encoding: utf-8 -*-
'''
@File : spider.py
@Time : 2023年06月18日 17:44:21 星期天
@Author : erma0
@Version : V3
@Link : https://erma0.cn
@Desc : 抖音爬虫核心代码
'''
import os
import re
import subprocess
from threading import Lock
from typing import List
from urllib.parse import quote, unquote, urlparse
import ujson as json
from loguru import logger
from playwright.sync_api import Error, Route, TimeoutError
from browser import Browser, BrowserContext
version = 'V3.230622'
banner = rf'''
____ _ ____ _ _
| _ \ ___ _ _ _ _(_)_ __ / ___| _ __ (_) __| | ___ _ __
| | | |/ _ \| | | | | | | | '_ \ \___ \| '_ \| |/ _` |/ _ \ '__|
| |_| | (_) | |_| | |_| | | | | | ___) | |_) | | (_| | __/ |
|____/ \___/ \__,_|\__, |_|_| |_| |____/| .__/|_|\__,_|\___|_|
|___/ |_|
{version}
Github: https://github.com/erma0/douyin
'''
print(banner)
class Douyin(object):
def __init__(self,
context: BrowserContext,
url: str = '',
num: int = -1,
type: str = 'post',
down_path: str = '下载',
path_type: str = 'id',
msToken: bool = False):
"""
初始化
type=['post', 'like', 'music', 'search', 'follow', 'fans', 'collection', 'video', 'favorite', 'id']
默认用id命名文件(夹),当path_type='title'时,使用昵称/标题来命名文件(夹),但可能影响用户作品增量采集
因为可能还没拿到用户昵称,就已经先拿到作品列表的请求了,此时会导致重复采集
"""
self.context = context
self.num = num
self.type = type
self.down_path = down_path
self.path_type = path_type
self.msToken = msToken
self.url = url.strip() if url else ''
self.has_more = True
if not os.path.exists(self.down_path): os.makedirs(self.down_path)
self.pageDown = 0
self.pageDownMax = 5 # 重试次数
self.results = [] # 保存结果
self.results_old = [] # 前一次保存结果
self.lock = Lock()
self.init_() # 初始化URL相关参数
@staticmethod
def str2path(str: str):
"""
把字符串转为Windows合法文件名
"""
# 非法字符
lst = ['\r', '\n', '\\', '/', ':', '*', '?', '"', '<', '>', '|']
# lst = ['\r', '\n', '\\', '/', ':', '*', '?', '"', '<', '>', '|', ' ', '^']
# 非法字符处理方式1
for key in lst:
str = str.replace(key, '_')
# 非法字符处理方式2
# str = str.translate(None, ''.join(lst))
# 文件名+路径长度最大255,汉字*2,取80
if len(str) > 80:
str = str[:80]
return str.strip()
@staticmethod
def quit(str):
"""
直接退出程序
"""
logger.error(str)
exit()
def url2redirect(self, url):
"""
取302跳转地址
"""
r = self.context.new_page()
r.goto(url, wait_until='domcontentloaded')
url = r.url
r.close()
return url
@staticmethod
def url2redirect_requests(url):
"""
requests取302跳转地址
"""
import requests
r = requests.get(url, allow_redirects=False)
u = r.headers.get('Location', url)
return u
@staticmethod
def filter_emoji(desstr, restr=''):
# 过滤表情,在处理文件名的时候如果想去除emoji可以调用
try:
res = re.compile(u'[\U00010000-\U0010ffff]')
except re.error:
res = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
return res.sub(restr, desstr)
def _append_user(self, user_list: List[dict]):
if not user_list:
logger.error("本次请求结果为空")
return
with self.lock: # 加锁避免意外冲突
if self.has_more:
for item in user_list:
if item['is_red_uniqueid']: # 完全匹配
info: dict = item['user_info']
for key in list(info.keys()):
if not info[key]:
info.pop(key)
self.results.append(info) # 用于保存信息
logger.info(f'采集中,已采集到{len(self.results)}条结果')
break
self.has_more = False # 只查第一页
def _append_users(self, user_list: List[dict]):
if not user_list:
logger.error("本次请求结果为空")
return
with self.lock: # 加锁避免意外冲突
if self.num < 0 or len(self.results) < self.num:
for item in user_list:
if self.num > 0 and len(self.results) >= self.num:
self.has_more = False
logger.info(f'已达到限制采集数量:{len(self.results)}')
return
info = {}
info['nickname'] = self.str2path(item['nickname'])
info['signature'] = self.str2path(item['signature'])
info['avatar'] = item['avatar_larger']['url_list'][0]
for i in [
'sec_uid', 'uid', 'short_id', 'unique_id', 'unique_id_modify_time', 'aweme_count', 'favoriting_count',
'follower_count', 'following_count', 'constellation', 'create_time', 'enterprise_verify_reason',
'is_gov_media_vip', 'live_status', 'total_favorited', 'share_qrcode_uri'
]:
if item.get(i):
info[i] = item[i]
room_id = item.get('room_id')
if room_id: # 直播间
info['live_room_id'] = room_id
info['live_room_url'] = [
f'http://pull-flv-f26.douyincdn.com/media/stream-{room_id}.flv',
f'http://pull-hls-f26.douyincdn.com/media/stream-{room_id}.m3u8'
]
music_count = item['original_musician']['music_count']
if music_count: # 原创音乐人
info['original_musician'] = item['original_musician']
self.results.append(info) # 用于保存信息
logger.info(f'采集中,已采集到{len(self.results)}条结果')
else:
self.has_more = False
logger.info(f'已达到限制采集数量:{len(self.results)}')
def _append_awemes(self, aweme_list: List[dict]):
"""
数据入库
"""
if not aweme_list:
logger.error("本次请求结果为空")
return
with self.lock: # 加锁避免意外冲突
if self.num < 0 or len(self.results) < self.num:
for item in aweme_list:
# =====限制数量=====
if self.num > 0 and len(self.results) >= self.num:
self.has_more = False
logger.info(f'已达到限制采集数量:{len(self.results)}')
return
# =====增量采集=====
_time = item.get('create_time', item.get('createTime'))
_is_top = item.get('is_top', item.get('tag', {}).get('isTop'))
if self.results_old:
old = self.results_old[0]['time']
if _time <= old: # 如果当前作品时间早于上次采集的最新作品时间,且不是置顶作品,直接退出
if _is_top:
continue
if self.has_more:
self.has_more = False
logger.success(f'增量采集完成,上次运行结果:{old}')
return
# =====保存结果=====
_type = item.get('aweme_type', item.get('awemeType'))
info = item.get('statistics', item.get('stats', {}))
for i in [
'playCount', 'downloadCount', 'forwardCount', 'collectCount', "digest", "exposure_count",
"live_watch_count", "play_count", "download_count", "forward_count", "lose_count",
"lose_comment_count"
]:
if not info.get(i):
info.pop(i, '')
info.pop('aweme_id', '')
if _type <= 66 or _type in [69, 107]: # 视频 77西瓜视频
play_addr = item['video'].get('play_addr')
if play_addr:
download_addr = item['video']['play_addr']['url_list'][-1]
else:
download_addr = f"https:{ item['video']['playApi']}"
info['download_addr'] = download_addr
elif _type == 68: # 图文
info['download_addr'] = [images.get('url_list', images.get('urlList'))[-1] for images in item['images']]
elif _type == 101: # 直播
continue
else: # 其他类型作品
info['download_addr'] = '其他类型作品'
logger.info('type', _type)
with open(f'{_type}.json', 'w', encoding='utf-8') as f: # 保存未区分的类型
json.dump(item, f, ensure_ascii=False) # 中文不用Unicode编码
continue
info['id'] = item.get('aweme_id', item.get('awemeId'))
info['time'] = _time
desc = self.str2path(item.get('desc'))
info['desc'] = desc
music = item.get('music')
if music:
info['music_title'] = self.str2path(music['title'])
info['music_url'] = music.get('play_url', music.get('playUrl'))['uri']
cover = item['video'].get('origin_cover')
if cover:
info['cover'] = item['video']['origin_cover']['url_list'][-1]
else:
info['cover'] = f"https:{item['video']['originCover']}"
tags = item.get('text_extra', item.get('textExtra'))
if tags:
info['tags'] = [{
'tag_id': hashtag.get('hashtag_id', hashtag.get('hashtagId')),
'tag_name': hashtag.get('hashtag_name', hashtag.get('hashtagName'))
} for hashtag in tags]
self.results.append(info) # 用于保存信息
logger.info(f'采集中,已采集到{len(self.results)}条结果')
else:
self.has_more = False
logger.info(f'已达到限制采集数量:{len(self.results)}')
def download(self):
"""
采集完成后,统一下载已采集的结果
"""
if os.path.exists(self.aria2_conf):
logger.info('开始下载')
# command = f'aria2c -c --console-log-level warn -d {self.down_path} -i {self.aria2_conf}'
command = ['aria2c', '-c', '--console-log-level', 'warn', '-d', self.down_path, '-i', self.aria2_conf]
subprocess.run(command) # shell=True时字符串会转义
else:
logger.error('没有发现可下载的配置文件')
def save(self):
if self.results:
logger.success(f'采集完成,本次共采集到{len(self.results)}条结果')
if self.type in ['post', 'like', 'music', 'search', 'collection', 'video', 'favorite']: # 视频列表保存为Aria下载文件
self.msToken = [_['value'] for _ in self.context.cookies() if _['name'] == 'msToken'] if self.msToken else None
_ = []
with open(self.aria2_conf, 'w', encoding='utf-8') as f:
for line in self.results: # 只保存本次采集结果的下载配置
filename = f'{line["id"]}_{line["desc"]}'
if isinstance(line["download_addr"], list):
down_path = self.down_path.replace(line["id"], filename) if self.type == 'video' else os.path.join(
self.down_path, filename)
[
_.append(f'{addr}\n\tdir={down_path}\n\tout={line["id"]}_{index + 1}.jpeg\n')
for index, addr in enumerate(line["download_addr"])
]
elif isinstance(line["download_addr"], str):
if self.msToken: # 下载0kb时,使用msToken
_.append(
f'{line["download_addr"]}\n\tdir={self.down_path}\n\tout={filename}.mp4\n\tuser-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36\n\theader=Cookie:msToken={self.msToken[0]}\n'
)
else:
_.append(f'{line["download_addr"]}\n\tdir={self.down_path}\n\tout={filename}.mp4\n') # 能正常下载的
else:
logger.error("下载地址错误")
f.writelines(_)
elif self.type in ['follow', 'fans', 'id']: # 用户列表保存主页链接
with open(self.aria2_conf, 'w', encoding='utf-8') as f:
f.writelines([
f"https://www.douyin.com/user/{line.get('sec_uid', 'None')}" for line in self.results
if line.get('sec_uid', None)
])
with open(f'{self.down_path}.json', 'w', encoding='utf-8') as f: # 保存所有数据到文件,包括旧数据
if self.type == 'post': # 除主页作品外都不需要按时间排序
self.results.sort(key=lambda item: item['id'], reverse=True)
self.results.extend(self.results_old)
json.dump(self.results, f, ensure_ascii=False)
else:
logger.info("本次采集结果为空")
def handle(self, route: Route):
try:
if self.has_more:
if self.pageDown > 0:
self.pageDown = 0
response = route.fetch()
if int(response.headers.get('content-length', 1)) > 0:
resj = response.json()
if self.has_more:
self.has_more = resj.get('has_more', True)
if self.type == 'follow':
info = resj.get('followings')
self._append_users(info)
elif self.type == 'fans':
info = resj.get('followers')
self._append_users(info)
elif self.type == 'id':
info = resj.get('user_list')
self._append_user(info)
elif self.type == 'search':
info = []
for item in resj.get('data'):
if item['type'] == 1: # 1作品 16合集 76百科 77头条文章 996热榜 997微头条
_info = item['aweme_info']
info.append(_info)
elif item['type'] == 16:
_info = item['aweme_mix_info']['mix_items']
info.extend(_info)
elif item['type'] == 996:
_info = item['sub_card_list'][0]['hotspot_info']['hotspot_items']
info.extend(_info)
else:
pass
self._append_awemes(info)
else:
info = resj.get('aweme_list')
self._append_awemes(info)
route.fulfill(response=response)
else:
route.abort()
except KeyError as err:
logger.error(f'Error: {err}')
with open('error.json', 'w', encoding='utf-8') as f: # 保存未区分的类型
json.dump(response.text(), f, ensure_ascii=False)
except Error as err:
msg = err.message.split("\n")[0]
logger.info(f'浏览器已关闭: {msg}')
# logger.info(f'Playwright Error: {msg}')
except Exception as err:
logger.error(f'Error: {err}')
def init_(self):
if not self.url: # 未需输入URL时,默认采集本账号
if self.type == 'favorite':
self.url = 'https://www.douyin.com/user/self?showTab=favorite_collection'
elif self.type == 'like':
self.url = 'https://www.douyin.com/user/self?showTab=like'
elif self.type in ['post', 'follow', 'fans']: # 命令行post必须输入URL
self.url = 'https://www.douyin.com/user/self'
else:
self.quit('请输入URL')
hostname = urlparse(self.url).hostname
if self.type == 'id': # 搜索用户ID
self.url = f'https://www.douyin.com/search/{self.url}?type=user'
elif self.url.isdigit(): # 数字ID,作品
self.url = f'https://www.douyin.com/video/{self.url}'
elif hostname and hostname.endswith('douyin.com'): # 链接
if hostname == 'v.douyin.com':
# self.url = self.url2redirect(self.url)
self.url = self.url2redirect_requests(self.url)
else: # 关键字,搜索
self.url = f'https://www.douyin.com/search/{quote(self.url)}'
*_, _type, self.id = unquote(urlparse(self.url).path.strip('/')).split('/')
hookURL = '/aweme/v[123]/web/'
if _type in ['video', 'note']: # 自动识别 单个作品 video
self.type = 'video'
hookURL = '单个作品无需hookURL'
if _type == 'search':
if self.type == 'id': # 搜索 用户ID
hookURL += 'discover/search'
else:
self.type = 'search' # 搜索 综合或视频
hookURL += '(general/search|search/item)'
elif _type == 'music': # 自动识别 音乐
self.type = 'music'
hookURL += 'music'
elif _type == 'collection': # 自动识别 合集
self.type = 'collection'
hookURL += 'mix/aweme'
elif _type == 'user': # 主页链接
if self.type == 'post' or self.url.endswith('?showTab=post'):
self.type = 'post'
hookURL += 'aweme/post'
elif self.type == 'like' or self.url.endswith('?showTab=like'):
self.type = 'like'
hookURL += 'aweme/favorit'
if not self.url.endswith('showTab=like'):
self.url = f'https://www.douyin.com/user/{self.id}?showTab=like'
elif self.type == 'favorite' or self.url.endswith('?showTab=favorite_collection'):
self.type = 'favorite'
hookURL += 'aweme/listcollection'
self.url = 'https://www.douyin.com/user/self?showTab=favorite_collection' # 采集收藏时无视输入的URL
elif self.type == 'follow':
hookURL += 'user/following'
elif self.type == 'fans':
hookURL += 'user/follower'
else: # 备用
pass
self.hookURL = re.compile(hookURL, re.S)
if self.path_type == 'id':
self.down_path = os.path.join(self.down_path, self.str2path(f'{self.type}_{self.id}'))
self.aria2_conf = f'{self.down_path}.txt'
if self.type == 'post': # 主页作品可以增量采集,先取回旧数据
if os.path.exists(f'{self.down_path}.json') and not self.results_old: # 主页作品可以增量采集,先取回旧数据
with open(f'{self.down_path}.json', 'r', encoding='utf-8') as f:
self.results_old = json.load(f)
def page_init(self):
self.page = self.context.new_page()
self.page.set_default_timeout(0)
if self.has_more:
self.page.route(self.hookURL, self.handle)
self.page.goto(self.url)
render_data: dict = json.loads(unquote(self.page.locator('id=RENDER_DATA').inner_text()))
_app = render_data.pop('app', None)
self.client_data = _app if _app else render_data.pop('1', None)
self._location = render_data.pop('_location', None)
self.render_data = render_data.popitem()[1] if render_data else None
if self.type in ['post', 'like', 'follow', 'fans', 'favorite']:
if self.render_data:
self.info = self.render_data['user'] # 备用
self.title = self.info['user']['nickname']
if self.type == 'follow': # 点击关注列表
self.page.locator('[data-e2e="user-info-follow"]').click()
self.page.locator('[data-e2e="user-fans-container"]').click()
elif self.type == 'fans': # 点击粉丝列表
self.page.locator('[data-e2e="user-info-fans"]').click()
self.page.locator('[data-e2e="user-fans-container"]').click()
elif self.type == 'id':
self.title = self.id
elif self.type == 'search':
self.title = self.id
if self.render_data:
self.info = self.render_data['defaultSearchParams']
# self.title = self.info['keyword']
elif self.type == 'collection':
if self.render_data:
self.info = self.render_data['aweme']['detail']['mixInfo']
self.title = self.info['mixName']
elif self.type == 'music': # 聚焦滚动列表
if self.render_data:
self.info = self.render_data['musicDetail']
self.title = self.info['title']
self.page.locator('[data-e2e="scroll-list"]').last.click()
elif self.type == 'video':
if self.render_data:
self.info = self.render_data['aweme']['detail']
self.title = self.id
else: # 备用
pass
if self.path_type == 'title':
self.down_path = os.path.join(self.down_path, self.str2path(f'{self.type}_{self.title}'))
self.aria2_conf = f'{self.down_path}.txt'
if self.type == 'post': # 主页作品可以增量采集,先取回旧数据
if os.path.exists(f'{self.down_path}.json') and not self.results_old:
with open(f'{self.down_path}.json', 'r', encoding='utf-8') as f:
self.results_old = json.load(f)
# has_more控制是否提取初始页面数据render-data,但打开主页后会立即hook到一次请求
# 此时has_more可能会变成0,不应影响提取render-data
if self.has_more is not False:
if self.type == 'post' and self.render_data.get('post', None): # post页面需提取
# 从新到旧排序,无视置顶作品(此需求一般用来采集最新作品)
if self.has_more:
self.has_more = self.render_data['post']['hasMore']
render_data_ls = self.render_data['post']['data']
render_data_ls.sort(key=lambda item: item.get('aweme_id', item.get('awemeId')), reverse=True)
self._append_awemes(render_data_ls)
elif self.type == 'video' and self.render_data.get('aweme', None): # video页面需提取
render_data_ls = [self.render_data['aweme']['detail']]
self._append_awemes(render_data_ls)
self.has_more = False
else: # 备用
pass
def page_next(self): # 加载数据
if self.type == 'collection':
self.page.get_by_role("button", name="点击加载更多").click()
else:
self.page.keyboard.press('End')
# logger.info("加载中")
def run(self):
"""
开始采集
"""
self.page_init()
while self.has_more and self.pageDown <= self.pageDownMax:
try:
with self.page.expect_request_finished(lambda request: self.hookURL.search(request.url), timeout=3000):
self.page_next() # 加载下一批数据
# print('下一页')
except TimeoutError: # 重试
self.pageDown += 1
logger.error("重试 + 1")
self.save() # 保存结果
self.page.close()
def test():
edge = Browser(headless=True)
# a = Douyin(
# context=edge.context,
# url='https://v.douyin.com/U3eAtXx/'
# # url='https://www.douyin.com/user/MS4wLjABAAAA1UojDGpM_JuQ91nbVjo6jLfJSpQ5hswNRBaAndW_5spMTAUJ4xjhOKtOW0f5IDa8'
# # url='https://www.douyin.com/user/MS4wLjABAAAAtSPIL_StfoqgclIO3YGO_wnQeGsRQuFP7hA3j6tUv2sXA2oGfVm9fwCLq8bmurs3?showTab=post'
# ) # 作品
# a = Douyin(
# context=edge.context,
# url='https://www.douyin.com/user/MS4wLjABAAAAtSPIL_StfoqgclIO3YGO_wnQeGsRQuFP7hA3j6tUv2sXA2oGfVm9fwCLq8bmurs3?showTab=like'
# ) # 喜欢
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4', num=11) # 作品
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4') # 作品
# a = Douyin(context=edge.context, url='https://v.douyin.com/UhYnoMS/') # 单个作品
# a = Douyin(context=edge.context, url='7233251303269453089') # 单个作品 ID图文
# a = Douyin(context=edge.context,url='https://v.douyin.com/BK2VMkG/') # 图集主页
# a = Douyin(context=edge.context,url='https://v.douyin.com/BGPBena/', type='music') # 音乐
# a = Douyin(context=edge.context,url='https://v.douyin.com/BGPBena/', num=11) # 音乐
# a = Douyin(context=edge.context,url='https://www.douyin.com/search/%E4%B8%8D%E8%89%AF%E4%BA%BA', num=30) # 搜索
# a = Douyin(context=edge.context,url='https://www.douyin.com/search/%E4%B8%8D%E8%89%AF%E4%BA%BA', type='search') # 搜索
# a = Douyin(context=edge.context,url='不良人', num=11) # 关键字搜索
# a = Douyin(context=edge.context,url='不良人', type='search', num=11) # 关键字搜索
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4?showTab=like') # 长链接+喜欢
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4', type='like') # 长链接+喜欢
# a = Douyin(context=edge.context, url='https://v.douyin.com/BGf3Wp6/', type='like') # 短链接+喜欢+自己的私密账号需登录
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4', type='fans') # 粉丝
# a = Douyin(context=edge.context,url='https://www.douyin.com/user/MS4wLjABAAAA8U_l6rBzmy7bcy6xOJel4v0RzoR_wfAubGPeJimN__4',type='follow') # 关注
# a = Douyin(context=edge.context,url='https://www.douyin.com/collection/7018087406876231711') # 合集
# a = Douyin(context=edge.context,url='https://www.douyin.com/collection/7018087406876231711', type='collection') # 合集
# a = Douyin(context=edge.context, type='like') # 登录账号的喜欢
# a = Douyin(context=edge.context, type='favorite') # 登录账号的收藏
a = Douyin(context=edge.context, url='xinhuashe', type='id') # 合集
a.run()
# a.download()
# python ./douyin.py -u https://v.douyin.com/BGf3Wp6/ -t like
edge.stop()
if __name__ == "__main__":
test()