-
Notifications
You must be signed in to change notification settings - Fork 1
/
wyyComments.py
185 lines (161 loc) · 8.47 KB
/
wyyComments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
from bs4 import BeautifulSoup
import urllib3
from Crypto.Cipher import AES
from urllib.request import urlopen
import random
import base64
import codecs
import math
import requests
import json
import time
import re
class WyyReptile(object):
url = ''
filePath = ''
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cookie': '[email protected]:-1:1; mail_psc_fingerprint=5ad9e89f6d01b06d4000931415d2947e; _iuqxldmzr_=32; _ntes_nnid=fc752d0b5f3d887efbd891792ff554f4,1548905023326; _ntes_nuid=fc752d0b5f3d887efbd891792ff554f4; WM_TID=qBPZxl7vSjhBVAFBFBMpkZ0D6e7jt2KV; usertrack=CrHubFxVh6tJuSgdA14UAg==; __remember_me=true; [email protected]|1554033066|0|mail163|00&99|CN&1553910313&cloudmusic#CN&null#10#0#0|178405&0|cloudmusic|[email protected]; WM_NI=vYDcbUctPVLO9AfJI7h%2BZ4w%2FyDSbuBRswJxiwh3mKaNNOvrY5VczdXAp0FD5aseeQF7cb5Wq2ZRw2oC0T%2B%2BdvF4Il%2BIvNDwvv8IQqY3JgZyTVUgoZb0xof%2FYy5bvQJuaWmU%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee89f553bb8aa193b58089968ba3c14e938a9f85ee64b3b39fa4c66badf0fc83c72af0fea7c3b92ab6878c84e15fa1aca0b7cf45fb9faaabcc3dacf596d5f625bb9c9d83f84a92adb9abfb63b5b8fd8eca6b8e928197f153f78aad9be75483b000a7c73fa3adab86d44ef5bdaed6d67bf2869e98b16581f09cccaa70b69afcaaef218baa82a5f67eb6e887b2f453f196a9bbfc60968ba990d96af4a8a187ae53bbebae97d442b7bf998ed837e2a3; JSESSIONID-WYYY=ccnGrqOHOfJxiQoxX6o%2Fqjh0VzfdjWetgHB73XbHV%5CIw79fs2cUV5C2t1OaRS1wUTJ2O%5Cb6HtMbVbykKjWTP01XmYdm%2BEmICwB98yvAPtj79vl%5CdxyRR%2FE7z4TJptwGjzUSdxEwE7XTSBSHiEzeHXgwxRegFR0tE87aBfrN%5C0tA9CmGO%3A1554435339497; MUSIC_U=a0c23480081ce6755d0872c35b14300d5c88a78378f3cf7832d89337a4d414dcc78d05d80ad8e128b865c7f5d98da64131b299d667364ed3; __csrf=9b9e37aa268af132c217ed10eb2b46bd',
'Host': 'music.163.com',
'Referer': 'https://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
def __init__(self, url, filePath):
self.url = url
self.filePath = filePath
# 生成16个随机字符
def generate_random_strs(self, length):
string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
# 控制次数参数i
i = 0
# 初始化随机字符串
random_strs = ""
while i < length:
e = random.random() * len(string)
# 向下取整
e = math.floor(e)
random_strs = random_strs + list(string)[e]
i = i + 1
return random_strs
# AES加密
def AESencrypt(self, msg, key):
# 如果不是16的倍数则进行填充(paddiing)
padding = 16 - len(msg) % 16
# 这里使用padding对应的单字符进行填充
msg = msg + padding * chr(padding)
# 用来加密或者解密的初始向量(必须是16位)
iv = '0102030405060708'
cipher = AES.new(key, AES.MODE_CBC, iv)
# 加密后得到的是bytes类型的数据
encryptedbytes = cipher.encrypt(msg)
# 使用Base64进行编码,返回byte字符串
encodestrs = base64.b64encode(encryptedbytes)
# 对byte字符串按utf-8进行解码
enctext = encodestrs.decode('utf-8')
return enctext
# RSA加密
def RSAencrypt(self, randomstrs, key, f):
# 随机字符串逆序排列
string = randomstrs[::-1]
# 将随机字符串转换成byte类型数据
text = bytes(string, 'utf-8')
seckey = int(codecs.encode(text, encoding='hex'),
16)**int(key, 16) % int(f, 16)
return format(seckey, 'x').zfill(256)
# 获取参数
def get_params(self, page):
# msg也可以写成msg = {"offset":"页面偏移量=(页数-1) * 20", "limit":"20"},offset和limit这两个参数必须有(js)
# limit最大值为100,当设为100时,获取第二页时,默认前一页是20个评论,也就是说第二页最新评论有80个,有20个是第一页显示的
# msg = '{"rid":"R_SO_4_1302938992","offset":"0","total":"True","limit":"100","csrf_token":""}'
# 偏移量
offset = (page-1) * 20
# offset和limit是必选参数,其他参数是可选的,其他参数不影响data数据的生成
msg = '{"offset":' + str(offset) + \
',"total":"True","limit":"20","csrf_token":""}'
key = '0CoJUm6Qyw8W8jud'
f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
e = '010001'
enctext = self.AESencrypt(msg, key)
# 生成长度为16的随机字符串
i = self.generate_random_strs(16)
# 两次AES加密之后得到params的值
encText = self.AESencrypt(enctext, i)
# RSA加密之后得到encSecKey的值
encSecKey = self.RSAencrypt(i, e, f)
return encText, encSecKey
# Http链接
def _getHttpConnection(self, url, data):
try:
resp = requests.post(url, headers=self.headers, data=data)
respContent = resp._content
if resp.status_code == 200:
return respContent.decode('UTF-8')
except Exception as e:
print('Error: '+e)
# 如果文件存在,便清除重写,如果不存在,创建,写入
def _writeFile(self, songName, content):
localtime = time.strftime('%Y-%m-%d', time.localtime())
folderPath = str(self.filePath+localtime)
folder = os.path.exists(folderPath)
if not folder:
os.makedirs(folderPath)
file = str(folderPath+'/'+songName+'.txt')
if os.path.exists(file):
with open(file, "a+") as f:
f.writelines(str(content))
else:
with open(file, "w+") as f:
f.writelines(str(content))
else:
file = str(folderPath+'/'+songName+'.txt')
if os.path.exists(file):
with open(file, "a+") as f:
f.writelines(str(content))
else:
with open(file, "w+") as f:
f.writelines(str(content))
# 获取歌曲热评
def _getHotComments(self, content):
commentsList = []
paseValue = json.loads(content)
hotCommentList = list(paseValue['hotComments'])
for resStr in hotCommentList:
commentsList.append(resStr['content'])
return commentsList
# 获取每日推荐的歌曲列表
def _getRecommendSongsId(self, pasePageJson):
songIdDict = {}
songsJson = json.loads(pasePageJson)
songsArray = list(songsJson['recommend'])
for songsInfo in songsArray:
songIdDict[str(songsInfo['name'])] = songsInfo['id']
return songIdDict
def _run(self):
try:
page = 1
params, encSecKey = self.get_params(page)
data = {'params': params, 'encSecKey': encSecKey}
songsIdPage = self._getHttpConnection(self.url, data)
songsIdDict = self._getRecommendSongsId(songsIdPage)
for k in songsIdDict:
songUrl = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_' + \
str(songsIdDict[k])+'?csrf_token='
print(k+' 正在写入……')
songsComments = self._getHttpConnection(songUrl, data)
comments = self._getHotComments(songsComments)
for res in comments:
try:
self._writeFile(k, str(
res+'\n'+'---------------------------------------'+'\n'))
except:
self._writeFile('ErrorName', str(
res+'\n'+'---------------------------------------'+'\n'))
print(k+' 已完成')
except Exception as e:
print('Error: '+e)
if __name__ == "__main__":
WyyReptile('https://music.163.com/weapi/v2/discovery/recommend/songs?csrf_token=',
'/media/data/VsCodeProject/songsComments/')._run()