Skip to content

Commit

Permalink
新增 livePhoto 文件下载功能
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeanAmier committed Jun 8, 2024
1 parent bd0780c commit 54b7cf8
Show file tree
Hide file tree
Showing 16 changed files with 123 additions and 85 deletions.
17 changes: 8 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<li>✅ 采集小红书作品信息</li>
<li>✅ 提取小红书作品下载地址</li>
<li>✅ 下载小红书无水印作品文件</li>
<li>✅ 下载小红书 livePhoto 文件(非无水印)</li>
<li>✅ 自动跳过已下载的作品文件</li>
<li>✅ 作品文件完整性处理机制</li>
<li>✅ 自定义图文作品文件下载格式</li>
Expand Down Expand Up @@ -148,8 +149,7 @@ async def example():
# 实例对象
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
user_agent = "" # 请求头 User-Agent,可选参数
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
proxy = None # 网络代理
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 * 10 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
Expand All @@ -161,7 +161,6 @@ async def example():
pass # 使用默认参数
async with XHS(work_path=work_path,
folder_name=folder_name,
user_agent=user_agent,
cookie=cookie,
proxy=proxy,
timeout=timeout,
Expand Down Expand Up @@ -211,12 +210,6 @@ async def example():
<td align="center"><code>发布时间 作者昵称 作品标题</code></td>
</tr>
<tr>
<td align="center">user_agent</td>
<td align="center">str</td>
<td align="center">请求头 User-Agent</td>
<td align="center">默认 UA</td>
</tr>
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">小红书网页版 Cookie,<b>无需登录</b></td>
Expand Down Expand Up @@ -271,6 +264,12 @@ async def example():
<td align="center">true</td>
</tr>
<tr>
<td align="center">live_download</td>
<td align="center">bool</td>
<td align="center">图文动图文件下载开关</td>
<td align="center">false</td>
</tr>
<tr>
<td align="center">folder_mode</td>
<td align="center">bool</td>
<td align="center">是否将每个作品的文件储存至单独的文件夹;文件夹名称与文件名称保持一致</td>
Expand Down
Binary file modified locale/en_GB/LC_MESSAGES/xhs.mo
Binary file not shown.
6 changes: 3 additions & 3 deletions locale/en_GB/LC_MESSAGES/xhs.po
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,6 @@ msgstr "Xiaohongshu web version cookie, no login required, parameters have been
msgid "小红书网页版 Cookie,无需登录,参数未设置"
msgstr "Xiaohongshu web version cookie, no login required, parameters not set"

msgid "默认 User-Agent"
msgstr "Default User-Agent"

msgid "不使用代理"
msgstr "No proxy"

Expand Down Expand Up @@ -247,6 +244,9 @@ msgstr "Video works download switch"
msgid "图文作品下载开关"
msgstr "Image works download switch"

msgid "动图文件下载开关"
msgstr "Live photo download switch"

msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr "The configuration file settings.json is missing necessary parameters. Please delete the file and run the program again to automatically generate the default configuration file!"

Expand Down
6 changes: 3 additions & 3 deletions locale/zh_CN/LC_MESSAGES/xhs.po
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,6 @@ msgstr ""
msgid "小红书网页版 Cookie,无需登录,参数未设置"
msgstr ""

msgid "默认 User-Agent"
msgstr ""

msgid "不使用代理"
msgstr ""

Expand Down Expand Up @@ -247,6 +244,9 @@ msgstr ""
msgid "图文作品下载开关"
msgstr ""

msgid "动图文件下载开关"
msgstr ""

msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr ""

Expand Down
4 changes: 1 addition & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ async def example():
# 实例对象
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
user_agent = "" # 请求头 User-Agent,可选参数
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
proxy = None # 网络代理
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 * 10 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
Expand All @@ -29,7 +28,6 @@ async def example():
pass # 使用默认参数
async with XHS(work_path=work_path,
folder_name=folder_name,
user_agent=user_agent,
cookie=cookie,
proxy=proxy,
timeout=timeout,
Expand Down
4 changes: 2 additions & 2 deletions source/CLI/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ def help_(ctx: Context, param, value) -> None:
("--work_path", "-wp", "str", _("作品数据 / 文件保存根路径")),
("--folder_name", "-fn", "str", _("作品文件储存文件夹名称")),
("--name_format", "-nf", "str", _("作品文件名称格式")),
("--user_agent", "-ua", "str", _("User-Agent")),
("--cookie", "-ck", "str", _("小红书网页版 Cookie,无需登录")),
("--proxy", "-p", "str", _("网络代理")),
("--timeout", "-t", "int", _("请求数据超时限制,单位:秒")),
("--chunk", "-c", "int", _("下载文件时,每次从服务器获取的数据块大小,单位:字节")),
("--max_retry", "-mr", "int", _("请求数据失败时,重试的最大次数")),
("--record_data", "-rd", "bool", _("是否记录作品数据至文件")),
("--image_format", "-if", "choice", _("图文作品文件下载格式,支持:PNG、WEBP")),
("--live_download", "-ld", "bool", _("图文动图文件下载开关")),
("--folder_mode", "-fm", "bool", _("是否将每个作品的文件储存至单独的文件夹")),
("--language", "-l", "choice", _("设置程序语言,目前支持:zh_CN、en_GB")),
("--settings", "-s", "str", _("读取指定配置文件")),
Expand Down Expand Up @@ -163,14 +163,14 @@ def help_(ctx: Context, param, value) -> None:
)
@option("--folder_name", "-fn", )
@option("--name_format", "-nf", )
@option("--user_agent", "-ua", )
@option("--cookie", "-ck", )
@option("--proxy", "-p", )
@option("--timeout", "-t", type=int, )
@option("--chunk", "-c", type=int, )
@option("--max_retry", "-mr", type=int, )
@option("--record_data", "-rd", type=bool, )
@option("--image_format", "-if", type=Choice(["png", "PNG", "webp", "WEBP"]), )
@option("--live_download", "-ld", type=bool, )
@option("--folder_mode", "-fm", type=bool, )
@option("--language", "-l",
type=Choice(["zh_CN", "en_GB"]), )
Expand Down
10 changes: 6 additions & 4 deletions source/TUI/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ def compose(self) -> ComposeResult:
Label(self.message("作品文件名称格式"), classes="params", ),
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
id="name_format", ),
Label(self.message("User-Agent"), classes="params", ),
Input(self.data["user_agent"], placeholder=self.message("默认 User-Agent"), valid_empty=True,
id="user_agent", ),
Label(self.message("小红书网页版 Cookie"), classes="params", ),
Input(placeholder=self.__check_cookie(), valid_empty=True, id="cookie", ),
Label(self.message("网络代理"), classes="params", ),
Expand All @@ -58,11 +55,16 @@ def compose(self) -> ComposeResult:
Checkbox(self.message("视频作品下载开关"), id="video_download", value=self.data["video_download"], ),
Checkbox(self.message("图文作品下载开关"), id="image_download", value=self.data["image_download"], ),
classes="horizontal-layout"),
Label(),
Container(
Checkbox(self.message("动图文件下载开关"), id="live_download", value=self.data["live_download"], ),
classes="horizontal-layout"),
Container(
Label(self.message("图片下载格式"), classes="params", ),
Label(self.message("程序语言"), classes="params", ),
classes="horizontal-layout",
),
Label(),
Container(
Select.from_values(
("PNG", "WEBP"),
Expand Down Expand Up @@ -95,7 +97,6 @@ def save_settings(self):
"work_path": self.query_one("#work_path").value,
"folder_name": self.query_one("#folder_name").value,
"name_format": self.query_one("#name_format").value,
"user_agent": self.query_one("#user_agent").value,
"cookie": self.query_one("#cookie").value or self.data["cookie"],
"proxy": self.query_one("#proxy").value or None,
"timeout": int(self.query_one("#timeout").value),
Expand All @@ -107,6 +108,7 @@ def save_settings(self):
"language": self.query_one("#language").value,
"image_download": self.query_one("#image_download").value,
"video_download": self.query_one("#video_download").value,
"live_download": self.query_one("#live_download").value,
# "server": False,
})

Expand Down
53 changes: 36 additions & 17 deletions source/TUI/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,45 @@ def compose(self) -> ComposeResult:
async def check_update(self) -> None:
try:
url = await self.xhs.html.request_url(RELEASES, False, None, timeout=ClientTimeout(connect=5))
latest_major, latest_minor = map(
int, url.split("/")[-1].split(".", 1))
if latest_major > VERSION_MAJOR or latest_minor > VERSION_MINOR:
tip = Text(f"{self.message("检测到新版本:{0}.{1}").format(
VERSION_MAJOR, VERSION_MINOR)}\n{RELEASES}", style=WARNING)
elif latest_minor == VERSION_MINOR and VERSION_BETA:
tip = Text(
f"{self.message("当前版本为开发版, 可更新至正式版")}\n{RELEASES}",
style=WARNING)
elif VERSION_BETA:
tip = Text(
self.message("当前已是最新开发版"),
style=WARNING)
else:
tip = Text(
self.message("当前已是最新正式版"),
style=INFO)
version = url.split("/")[-1]
match self.compare_versions(f"{VERSION_MAJOR}.{VERSION_MINOR}", version, VERSION_BETA):
case 4:
tip = Text(f"{self.message("检测到新版本:{0}.{1}").format(
VERSION_MAJOR, VERSION_MINOR)}\n{RELEASES}", style=WARNING)
case 3:
tip = Text(
f"{self.message("当前版本为开发版, 可更新至正式版")}\n{RELEASES}",
style=WARNING)
case 2:
tip = Text(
self.message("当前已是最新开发版"),
style=WARNING)
case 1:
tip = Text(
self.message("当前已是最新正式版"),
style=INFO)
case _:
raise ValueError
except ValueError:
tip = Text(self.message("检测新版本失败"), style=ERROR)
self.dismiss(tip)

def on_mount(self) -> None:
self.check_update()

@staticmethod
def compare_versions(
current_version: str,
target_version: str,
is_development: bool) -> int:
current_major, current_minor = map(int, current_version.split('.'))
target_major, target_minor = map(int, target_version.split('.'))

if target_major > current_major:
return 4
if target_major == current_major:
if target_minor > current_minor:
return 4
if target_minor == current_minor:
return 3 if is_development else 1
return 2
10 changes: 7 additions & 3 deletions source/application/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(
image_format="PNG",
image_download=True,
video_download=True,
live_download=False,
folder_mode=False,
language="zh_CN",
# server=False,
Expand All @@ -77,7 +78,7 @@ def __init__(
work_path,
folder_name,
name_format,
user_agent,
# user_agent,
chunk,
self.read_browser_cookie(read_cookie) or cookie,
proxy,
Expand All @@ -87,6 +88,7 @@ def __init__(
image_format,
image_download,
video_download,
live_download,
folder_mode,
# server,
self.message,
Expand All @@ -106,7 +108,7 @@ def __init__(
self.site = None

def __extract_image(self, container: dict, data: Namespace):
container["下载地址"] = self.image.get_image_link(
container["下载地址"], container["动图地址"] = self.image.get_image_link(
data, self.manager.image_format)

def __extract_video(self, container: dict, data: Namespace):
Expand All @@ -119,7 +121,8 @@ async def __download_files(self, container: dict, download: bool, index, log, ba
logging(
log, self.message("作品 {0} 存在下载记录,跳过下载").format(i))
else:
path, result = await self.download.run(u, index, name, container["作品类型"], log, bar)
path, result = await self.download.run(u, container["动图地址"], index, name, container["作品类型"],
log, bar)
await self.__add_record(i, result)
elif not u:
logging(log, self.message("提取作品文件下载地址失败"), ERROR)
Expand All @@ -128,6 +131,7 @@ async def __download_files(self, container: dict, download: bool, index, log, ba
async def save_data(self, data: dict, ):
data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data["下载地址"] = " ".join(data["下载地址"])
data["动图地址"] = " ".join(i or "NaN" for i in data["动图地址"])
await self.data_recorder.add(**data)

async def __add_record(self, id_: str, result: tuple) -> None:
Expand Down
36 changes: 24 additions & 12 deletions source/application/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,21 @@ def __init__(self, manager: Manager, ):
self.message = manager.message
self.folder_mode = manager.folder_mode
self.video_format = "mp4"
self.live_format = "mp4"
self.image_format = manager.image_format
self.image_download = manager.image_download
self.video_download = manager.video_download
self.live_download = manager.live_download

async def run(self, urls: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[Path, tuple]:
async def run(self, urls: list, lives: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[
Path, tuple]:
path = self.__generate_path(name)
match type_:
case "视频":
tasks = self.__ready_download_video(urls, path, name, log)
case "图文":
tasks = self.__ready_download_image(
urls, index, path, name, log)
urls, lives, index, path, name, log)
case _:
raise ValueError
tasks = [
Expand Down Expand Up @@ -73,14 +76,14 @@ def __ready_download_video(
if not self.video_download:
logging(log, self.message("视频作品下载功能已关闭,跳过下载"))
return []
if any(path.glob(f"{name}.*")):
logging(log, self.message("{0} 文件已存在,跳过下载").format(name))
if self.__check_exists(path, f"{name}.{self.video_format}", log):
return []
return [(urls[0], name, self.video_format)]

def __ready_download_image(
self,
urls: list[str],
lives: list[str],
index: list | tuple | None,
path: Path,
name: str,
Expand All @@ -89,21 +92,30 @@ def __ready_download_image(
if not self.image_download:
logging(log, self.message("图文作品下载功能已关闭,跳过下载"))
return tasks
for i, j in enumerate(urls, start=1):
for i, j in enumerate(zip(urls, lives), start=1):
if index and i not in index:
continue
file = f"{name}_{i}"
if any(path.glob(f"{file}.*")):
logging(
log, self.message(
"{0} 文件已存在,跳过下载").format(name))
if not self.__check_exists(
path, f"{file}.{self.image_format}", log):
tasks.append([j[0], file, self.image_format])
if not self.live_download or not j[1] or self.__check_exists(
path, f"{file}.{self.live_format}", log):
continue
tasks.append([j, file, self.image_format])
tasks.append([j[1], file, self.live_format])
return tasks

def __check_exists(self, path: Path, name: str, log, ) -> bool:
if any(path.glob(name)):
logging(
log, self.message(
"{0} 文件已存在,跳过下载").format(name))
return True
return False

@re_download
async def __download(self, url: str, path: Path, name: str, format_: str, log, bar):
temp = self.temp.joinpath(name)
temp = self.temp.joinpath(f"{name}.{format_}")
try:
async with self.session.get(url, proxy=self.proxy) as response:
if response.status != 200:
Expand All @@ -124,7 +136,7 @@ async def __download(self, url: str, path: Path, name: str, format_: str, log, b
# self.__update_progress(bar, len(chunk))
self.manager.move(temp, real)
# self.__create_progress(bar, None)
logging(log, self.message("文件 {0} 下载成功").format(name))
logging(log, self.message("文件 {0} 下载成功").format(real.name))
return True
except ClientError as error:
self.manager.delete(temp)
Expand Down
Loading

0 comments on commit 54b7cf8

Please sign in to comment.