[pixiv] fix novel text extraction (#5285)

change to '/webview/v2/novel' since '/v1/novel/text' does not work anymore
mikf · Mar 6, 2024 · db507e3 · db507e3
1 parent 9fd851c
commit db507e3
Showing 1 changed file with 17 additions and 4 deletions.
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
@@ -650,7 +650,7 @@ def transform_tags(work):
             yield Message.Directory, novel
 
             try:
-                content = self.api.novel_text(novel["id"])["novel_text"]
+                content = self.api.novel_webview(novel["id"])["text"]
             except Exception:
                 self.log.warning("Unable to download novel %s", novel["id"])
                 continue
@@ -663,7 +663,7 @@ def transform_tags(work):
                 illusts = {}
 
                 for marker in text.extract_iter(content, "[", "]"):
-                    if marker.startswith("[jumpuri:If you would like to "):
+                    if marker.startswith("uploadedimage:"):
                         desktop = True
                     elif marker.startswith("pixivimage:"):
                         illusts[marker[11:].partition("-")[0]] = None
@@ -918,6 +918,15 @@ def novel_text(self, novel_id):
         params = {"novel_id": novel_id}
         return self._call("/v1/novel/text", params)
 
+    def novel_webview(self, novel_id):
+        params = {"id": novel_id, "viewer_version": "20221031_ai"}
+        return self._call(
+            "/webview/v2/novel", params, self._novel_webview_parse)
+
+    def _novel_webview_parse(self, response):
+        return util.json_loads(text.extr(
+            response.text, "novel: ", ",\n"))
+
     def search_illust(self, word, sort=None, target=None, duration=None,
                       date_start=None, date_end=None):
         params = {"word": word, "search_target": target,
@@ -962,13 +971,17 @@ def ugoira_metadata(self, illust_id):
         params = {"illust_id": illust_id}
         return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
 
-    def _call(self, endpoint, params=None):
+    def _call(self, endpoint, params=None, parse=None):
         url = "https://app-api.pixiv.net" + endpoint
 
         while True:
             self.login()
             response = self.extractor.request(url, params=params, fatal=False)
-            data = response.json()
+
+            if parse:
+                data = parse(response)
+            else:
+                data = response.json()
 
             if "error" not in data:
                 return data