Skip to content

Commit

Permalink
[pixiv] implement workaround for 'limit_sanity_level' works
Browse files Browse the repository at this point in the history
(#4327, #4747, #5054, #5435, #5651, #5655)

Metadata should be ~95% identical (there might be some 'date' differences)
and there could be issues with R-18 works, as these require some URL
manipulation to transform /c/250x250_80_a2/ thumbnail URLs into
/img-original/ ones.
  • Loading branch information
mikf committed Oct 4, 2024
1 parent d1432d0 commit c5be50f
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 7 deletions.
10 changes: 10 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3525,6 +3525,16 @@ Description
A value of ``0`` means no limit.


extractor.pixiv.sanity
----------------------
Type
``bool``
Default
``true``
Description
Try to fetch ``limit_sanity_level`` works via web API.


extractor.plurk.comments
------------------------
Type
Expand Down
98 changes: 92 additions & 6 deletions gallery_dl/extractor/pixiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ class PixivExtractor(Extractor):
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
cookies_domain = None
url_sanity = ("https://s.pximg.net/common/images"
sanity_url = ("https://s.pximg.net/common/images"
"/limit_sanity_level_360.png")

def _init(self):
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
self.max_posts = self.config("max-posts", 0)
self.sanity_workaround = self.config("sanity", True)

def items(self):
tags = self.config("tags", "japanese")
Expand Down Expand Up @@ -102,10 +103,14 @@ def _extract_files(self, work):

elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
if url == self.url_sanity:
self.log.warning(
"Unable to download work %s ('sanity_level' warning)",
work["id"])
if url == self.sanity_url:
if self.sanity_workaround:
self.log.warning("%s: 'sanity_level' warning", work["id"])
self._extract_ajax(work, files)
else:
self.log.warning(
"%s: Unable to download work ('sanity_level' warning)",
work["id"])
else:
files.append({"url": url})

Expand Down Expand Up @@ -147,13 +152,93 @@ def _extract_ugoira(self, work, files):
"num": num,
"suffix": "_p{:02}".format(num),
"_ugoira_frame_index": num,

}))
else:
files.append({
"url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
})

def _extract_ajax(self, work, files):
url = "{}/ajax/illust/{}".format(self.root, work["id"])
data = self.request(url, headers=self.headers_web).json()
body = data["body"]

for key_app, key_ajax in (
("title" , "illustTitle"),
("image_urls" , "urls"),
("caption" , "illustComment"),
("create_date" , "createDate"),
("width" , "width"),
("height" , "height"),
("sanity_level" , "sl"),
("total_view" , "viewCount"),
("total_comments" , "commentCount"),
("total_bookmarks" , "bookmarkCount"),
("restrict" , "restrict"),
("x_restrict" , "xRestrict"),
("illust_ai_type" , "aiType"),
("illust_book_style", "bookStyle"),
):
work[key_app] = body[key_ajax]

work["user"] = {
"account" : body["userAccount"],
"id" : int(body["userId"]),
"is_followed": False,
"name" : body["userName"],
"profile_image_urls": {},
}

work["tags"] = tags = []
for tag in body["tags"]["tags"]:
name = tag["tag"]
try:
translated_name = tag["translation"]["en"]
except Exception:
translated_name = None
tags.append({"name": name, "translated_name": translated_name})

url = self._extract_ajax_url(body)
if not url:
return

work["page_count"] = count = body["pageCount"]
if count == 1:
files.append({"url": url})
else:
base, _, ext = url.rpartition("_p0.")
for num in range(count):
url = "{}_p{}.{}".format(base, num, ext)
files.append({
"url" : url,
"suffix": "_p{:02}".format(num),
})

def _extract_ajax_url(self, body):
try:
original = body["urls"]["original"]
if original:
return original
except KeyError:
pass

try:
square1200 = body["userIllusts"][body["id"]]["url"]
except KeyError:
return
parts = square1200.rpartition("_p0")[0].split("/")
del parts[3:5]
parts[3] = "img-original"
base = "/".join(parts)

for ext in ("jpg", "png", "gif"):
try:
url = "{}_p0.{}".format(base, ext)
self.request(url, method="HEAD")
return url
except exception.HttpError:
pass

@staticmethod
def _date_from_url(url, offset=timedelta(hours=9)):
try:
Expand Down Expand Up @@ -860,6 +945,7 @@ def __init__(self, extractor):
self.username = extractor._get_auth_info()[0]
self.user = None

extractor.headers_web = extractor.session.headers.copy()
extractor.session.headers.update({
"App-OS" : "ios",
"App-OS-Version": "16.7.2",
Expand Down
79 changes: 78 additions & 1 deletion test/results/pixiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,88 @@
{
"#url" : "https://www.pixiv.net/artworks/85960783",
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
"#category": ("", "pixiv", "work"),
"#class" : pixiv.PixivWorkExtractor,
"#options" : {"sanity": False},
"#count" : 0,
},

{
"#url" : "https://www.pixiv.net/en/artworks/102932581",
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
"#class" : pixiv.PixivWorkExtractor,
"#options" : {"sanity": True},
"#urls" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",

"caption" : "Meet a deer .",
"comment_access_control": 0,
"create_date" : "2022-11-19T15:00:00+00:00",
"date" : "dt:2022-11-19 15:00:00",
"date_url" : "dt:2022-11-19 15:00:49",
"extension" : "jpg",
"filename" : "102932581_p0",
"height" : 3840,
"id" : 102932581,
"illust_ai_type": 1,
"illust_book_style": 0,
"is_bookmarked" : False,
"is_muted" : False,
"num" : 0,
"page_count" : 1,
"rating" : "General",
"restrict" : 0,
"sanity_level" : 2,
"series" : None,
"suffix" : "",
"title" : "《 Bridge and Deer 》",
"tools" : [],
"total_bookmarks": range(1900, 3000),
"total_comments": range(3, 10),
"total_view" : range(11000, 20000),
"type" : "illust",
"url" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
"visible" : False,
"width" : 2160,
"x_restrict" : 0,
"image_urls" : {
"mini" : "https://i.pximg.net/c/48x48/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
"original": "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
"regular" : "https://i.pximg.net/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
"small" : "https://i.pximg.net/c/540x540_70/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
"thumb" : "https://i.pximg.net/c/250x250_80_a2/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
},
"tags" : [
"オリジナル",
"風景",
"イラスト",
"illustration",
"美しい",
"女の子",
"少女",
"deer",
"flower",
"spring",
],
"user" : {
"account" : "805482263",
"id" : 7386235,
"is_followed": False,
"name" : "岛的鲸",
"profile_image_urls": {},
},
},

{
"#url" : "https://www.pixiv.net/en/artworks/109487939",
"#comment" : "R-18 limit_sanity_level_360.png (#4327, #5180)",
"#class" : pixiv.PixivWorkExtractor,
"#urls" : [
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p0.png",
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p1.png",
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p2.png",
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p3.png",
],
},

{
"#url" : "https://www.pixiv.net/en/artworks/966412",
"#category": ("", "pixiv", "work"),
Expand Down

0 comments on commit c5be50f

Please sign in to comment.