From db507e30c7431d4ed7e23c153a044ce1751c2847 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 6 Mar 2024 02:26:52 +0100
Subject: [PATCH] [pixiv] fix novel text extraction (#5285)

change to '/webview/v2/novel'
since '/v1/novel/text' does not work anymore
---
 gallery_dl/extractor/pixiv.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index b9821f2309..862a7db206 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -650,7 +650,7 @@ def transform_tags(work):
             yield Message.Directory, novel
 
             try:
-                content = self.api.novel_text(novel["id"])["novel_text"]
+                content = self.api.novel_webview(novel["id"])["text"]
             except Exception:
                 self.log.warning("Unable to download novel %s", novel["id"])
                 continue
@@ -663,7 +663,7 @@ def transform_tags(work):
                 illusts = {}
 
                 for marker in text.extract_iter(content, "[", "]"):
-                    if marker.startswith("[jumpuri:If you would like to "):
+                    if marker.startswith("uploadedimage:"):
                         desktop = True
                     elif marker.startswith("pixivimage:"):
                         illusts[marker[11:].partition("-")[0]] = None
@@ -918,6 +918,15 @@ def novel_text(self, novel_id):
         params = {"novel_id": novel_id}
         return self._call("/v1/novel/text", params)
 
+    def novel_webview(self, novel_id):
+        params = {"id": novel_id, "viewer_version": "20221031_ai"}
+        return self._call(
+            "/webview/v2/novel", params, self._novel_webview_parse)
+
+    def _novel_webview_parse(self, response):
+        return util.json_loads(text.extr(
+            response.text, "novel: ", ",\n"))
+
     def search_illust(self, word, sort=None, target=None, duration=None,
                       date_start=None, date_end=None):
         params = {"word": word, "search_target": target,
@@ -962,13 +971,17 @@ def ugoira_metadata(self, illust_id):
         params = {"illust_id": illust_id}
         return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
 
-    def _call(self, endpoint, params=None):
+    def _call(self, endpoint, params=None, parse=None):
         url = "https://app-api.pixiv.net" + endpoint
 
         while True:
             self.login()
             response = self.extractor.request(url, params=params, fatal=False)
-            data = response.json()
+
+            if parse:
+                data = parse(response)
+            else:
+                data = response.json()
 
             if "error" not in data:
                 return data