-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathWechat_MP.yaml
117 lines (82 loc) · 6.56 KB
/
Wechat_MP.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# 元信息
meta:
author: "adamhuan"
desc: "微信公众号 - 爬虫"
# 微信公众号设定
wechat_mp:
# 公众号名称
mp_name: "Nephilim"
# 合集列表
collections:
- name: "Python"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2604643599875604481#wechat_redirect"
- name: "MySQL"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2604620702314430464#wechat_redirect"
- name: "Cloudera"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2604619754703716353#wechat_redirect"
- name: "Vue.JS"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2641696104832598018#wechat_redirect"
- name: "Webpack"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2693638439640006662#wechat_redirect"
- name: "烹饪【思考与理念】"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2611934822357303298#wechat_redirect"
- name: "烹饪【调味品】"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2611386114783870977#wechat_redirect"
- name: "烹饪【食谱】"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2605040820764835842#wechat_redirect"
- name: "烹饪【刀工】"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2604756439605215234#wechat_redirect"
- name: "烹饪【雕刻】"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2719456958025596930#wechat_redirect"
- name: "歌曲"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2612226984605155330#wechat_redirect"
- name: "英语"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2611386115102638084#wechat_redirect"
- name: "读书"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzI0NjI2Nzg1Ng==&action=getalbum&album_id=2616568787093454851#wechat_redirect"
- name: "脱单实验室RIGHT - #脱单"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzIxOTM2NDk5MQ==&scene=1&album_id=2083447322302054402&count=3#wechat_redirect"
- name: "泡泡恋爱学 - #脱单"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=Mzg5NDczMTMxNg==&scene=1&album_id=2389116273214586881&count=3#wechat_redirect"
- name: "遇见长安遇见你 - #漂亮小姐姐云集"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=Mzg2NzIwNDU1Mw==&scene=1&album_id=1977362513277091842&count=3#wechat_redirect"
- name: "非凡精英 - #VSINGLE优秀单身女会员推荐"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzI0MDA0NzA4MA==&scene=1&album_id=1371534895293808643&count=3#wechat_redirect"
- name: "国漫二次元 - #长腿美女"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzIzNDE5NDA3Nw==&scene=1&album_id=2616307804093874177&count=3#wechat_redirect"
# - name: "国漫二次元 - 宅男女神"
# url: "https://mp.weixin.qq.com/mp/homepage?__biz=MzIzNDE5NDA3Nw==&hid=11&sn=172e2729d2e809d388470369e977b882&scene=1&devicetype=android-33&version=28001f59&lang=zh_CN&nettype=WIFI&ascene=59&session_us=gh_dfc54b389b0a&wx_header=3"
# - name: "国漫二次元 - 二次元"
# url: "https://mp.weixin.qq.com/mp/homepage?__biz=MzIzNDE5NDA3Nw==&hid=10&sn=033aa7a16615db945ab22ce5ac663b78&scene=1&devicetype=android-33&version=28001f59&lang=zh_CN&nettype=WIFI&ascene=59&session_us=gh_dfc54b389b0a&wx_header=3"
# - name: "国漫二次元 - 动漫美女"
# url: "https://mp.weixin.qq.com/mp/homepage?__biz=MzIzNDE5NDA3Nw==&hid=9&sn=6b4948f66db73a05fe100bbf945cf712&scene=1&devicetype=android-33&version=28001f59&lang=zh_CN&nettype=WIFI&ascene=59&session_us=gh_dfc54b389b0a&wx_header=3"
- name: "国漫二次元 - 国漫美女"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzIzNDE5NDA3Nw==&scene=1&album_id=2641049543576715266&count=3#wechat_redirect"
- name: "国漫二次元 - 黑丝"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzIzNDE5NDA3Nw==&scene=1&album_id=2651112935570079747&count=3#wechat_redirect"
- name: "国漫二次元 - 长腿美女"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzIzNDE5NDA3Nw==&scene=1&album_id=2616307804093874177&count=3#wechat_redirect"
- name: "静思有我 - #静思杂谈图文"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzkwMDM0NTk3Ng==&scene=1&album_id=2357033074712297473&count=3#wechat_redirect"
- name: "连岳 - #下周很重要"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MjM5NDU0Mjk2MQ==&scene=1&album_id=1365851405604749313&count=3#wechat_redirect"
- name: "CVer - #高校"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzUxNjcxMjQxNg==&scene=1&album_id=2328557090577186818&count=3#wechat_redirect"
- name: "云头条 - #云头条"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MzI3OTU0MDMxOA==&scene=1&album_id=2412687289320767490&count=3#wechat_redirect"
- name: "Linux公社 - #Linux"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5NDEwNzc0MQ==&action=getalbum&album_id=1790011738583515139&scene=173&from_msgid=2650963129&from_itemidx=1&count=3&nolastread=1#wechat_redirect"
- name: "一读 - #周末考"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?__biz=OTE4MzAyODYx&action=getalbum&album_id=1816784172577849348&scene=173&from_msgid=2652325582&from_itemidx=1&count=3&nolastread=1#wechat_redirect"
- name: "杜蕾斯 - #原创"
url: "https://mp.weixin.qq.com/mp/appmsgalbum?action=getalbum&__biz=MjM5MDAxMTQ0MA==&scene=1&album_id=2599225411608510465&count=3#wechat_redirect"
# 爬取设定
converge:
path:
# 爬取后存放路径 - 基础路径
converge_to: "/Users/adamhuan/adamhuan_data/wechat_mp_data"
# 页面资源的路径 - 子目录后缀
page_resource_suffix: "___assset"
file:
total_tag_already_save_as: "total_tag_already_save_as.conf"
total_article_already_save_as: "total_article_already_save_as.conf"