From db507e30c7431d4ed7e23c153a044ce1751c2847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Mar 2024 02:26:52 +0100 Subject: [PATCH] [pixiv] fix novel text extraction (#5285) change to '/webview/v2/novel' since '/v1/novel/text' does not work anymore --- gallery_dl/extractor/pixiv.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index b9821f2309..862a7db206 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -650,7 +650,7 @@ def transform_tags(work): yield Message.Directory, novel try: - content = self.api.novel_text(novel["id"])["novel_text"] + content = self.api.novel_webview(novel["id"])["text"] except Exception: self.log.warning("Unable to download novel %s", novel["id"]) continue @@ -663,7 +663,7 @@ def transform_tags(work): illusts = {} for marker in text.extract_iter(content, "[", "]"): - if marker.startswith("[jumpuri:If you would like to "): + if marker.startswith("uploadedimage:"): desktop = True elif marker.startswith("pixivimage:"): illusts[marker[11:].partition("-")[0]] = None @@ -918,6 +918,15 @@ def novel_text(self, novel_id): params = {"novel_id": novel_id} return self._call("/v1/novel/text", params) + def novel_webview(self, novel_id): + params = {"id": novel_id, "viewer_version": "20221031_ai"} + return self._call( + "/webview/v2/novel", params, self._novel_webview_parse) + + def _novel_webview_parse(self, response): + return util.json_loads(text.extr( + response.text, "novel: ", ",\n")) + def search_illust(self, word, sort=None, target=None, duration=None, date_start=None, date_end=None): params = {"word": word, "search_target": target, @@ -962,13 +971,17 @@ def ugoira_metadata(self, illust_id): params = {"illust_id": illust_id} return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"] - def _call(self, endpoint, params=None): + def _call(self, endpoint, params=None, parse=None): url = "https://app-api.pixiv.net" + endpoint while True: self.login() response = self.extractor.request(url, params=params, fatal=False) - data = response.json() + + if parse: + data = parse(response) + else: + data = response.json() if "error" not in data: return data