From 1d75c8308c02297de034b8122124d8f3f7749855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 10 Jan 2025 22:08:01 +0100 Subject: [PATCH] [weebcentral] add support (#6778) --- docs/configuration.rst | 1 + docs/gallery-dl.conf | 7 +- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/weebcentral.py | 136 ++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + test/results/weebcentral.py | 78 ++++++++++++++++ 7 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/weebcentral.py create mode 100644 test/results/weebcentral.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 11d381672e..6338315350 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -400,6 +400,7 @@ Default ``soundgasm``, ``urlgalleries``, ``vk``, + ``weebcentral``, ``zerochan`` * ``"1.0-2.0"`` ``flickr``, diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 512cd0325e..bef2043fd7 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -435,7 +435,8 @@ { "cookies": null, - "files" : ["images", "image_large", "attachments", "postfile", "content"] + "files" : ["images", "image_large", "attachments", "postfile", "content"], + "format-images": "download_url" }, "pillowfort": { @@ -701,6 +702,10 @@ "api-key" : null, "metadata": false }, + "weebcentral": + { + "sleep-request": "0.5-1.5" + }, "weibo": { "sleep-request": "1.0-2.0", diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ea18f1f06c..3cf97d9a88 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1051,6 +1051,12 @@ Consider all listed sites to potentially be NSFW. Comics, Episodes + + Weeb Central + https://weebcentral.com/ + Chapters, Manga + + Weibo https://www.weibo.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d003a61a2a..89a991e5af 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -190,6 +190,7 @@ "weasyl", "webmshare", "webtoons", + "weebcentral", "weibo", "wikiart", "wikifeet", diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py new file mode 100644 index 0000000000..39f998a827 --- /dev/null +++ b/gallery_dl/extractor/weebcentral.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://weebcentral.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com" + + +class WeebcentralBase(): + category = "weebcentral" + root = "https://weebcentral.com" + request_interval = (0.5, 1.5) + + @memcache(keyarg=1) + def _extract_manga_data(self, manga_id): + url = "{}/series/{}".format(self.root, manga_id) + page = self.request(url).text + extr = text.extract_from(page) + + return { + "manga_id": manga_id, + "lang" : "en", + "language": "English", + "manga" : text.unescape(extr("", " | Weeb Central")), + "author" : text.split_html(extr("<strong>Author", "</li>"))[1::2], + "tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2], + "type" : text.remove_html(extr("<strong>Type: ", "</li>")), + "status" : text.remove_html(extr("<strong>Status: ", "</li>")), + "release" : text.remove_html(extr("<strong>Released: ", "</li>")), + "official": ">Yes" in extr("<strong>Official Translatio", "</li>"), + "description": text.unescape(text.remove_html(extr( + "<strong>Description", "</li>"))), + } + + +class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): + """Extractor for manga chapters from weebcentral.com""" + pattern = BASE_PATTERN + r"(/chapters/(\w+))" + example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV" + + def metadata(self, page): + extr = text.extract_from(page) + manga_id = extr("'series_id': '", "'") + + data = self._extract_manga_data(manga_id) + data["chapter_id"] = self.groups[1] + data["chapter_type"] = extr("'chapter_type': '", "'") + + chapter, sep, minor = extr("'number': '", "'").partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + + return data + + def images(self, page): + referer = self.gallery_url + url = referer + "/images" + params = { + "is_prev" : "False", + "current_page" : "1", + "reading_style": "long_strip", + } + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Current-URL": referer, + } + page = self.request(url, params=params, headers=headers).text + extr = text.extract_from(page) + + results = [] + while True: + src = extr(' src="', '"') + if not src: + break + results.append((src, { + "width" : text.parse_int(extr(' width="' , '"')), + "height": text.parse_int(extr(' height="', '"')), + })) + return results + + +class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor): + """Extractor for manga from weebcentral.com""" + chapterclass = WeebcentralChapterExtractor + pattern = BASE_PATTERN + r"/series/(\w+)" + example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" + + def __init__(self, match): + MangaExtractor.__init__(self, match, False) + + def chapters(self, _): + manga_id = self.groups[0] + referer = "{}/series/{}".format(self.root, manga_id) + url = referer + "/full-chapter-list" + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Target" : "chapter-list", + "HX-Current-URL": referer, + } + page = self.request(url, headers=headers).text + extr = text.extract_from(page) + data = self._extract_manga_data(manga_id) + base = self.root + "/chapters/" + + results = [] + while True: + chapter_id = extr("/chapters/", '"') + if not chapter_id: + break + type, _, chapter = extr('<span class="">', "<").partition(" ") + chapter, sep, minor = chapter.partition(".") + + chapter = { + "chapter_id" : chapter_id, + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + "chapter_type" : type, + "date" : text.parse_datetime( + extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"), + } + chapter.update(data) + results.append((base + chapter_id, chapter)) + return results diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 704f3a91a5..3ef7e9e0e0 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -155,6 +155,7 @@ "wallpapercave" : "Wallpaper Cave", "webmshare" : "webmshare", "webtoons" : "Webtoon", + "weebcentral" : "Weeb Central", "wikiart" : "WikiArt.org", "wikigg" : "wiki.gg", "wikimediacommons": "Wikimedia Commons", diff --git a/test/results/weebcentral.py b/test/results/weebcentral.py new file mode 100644 index 0000000000..aef9903e0a --- /dev/null +++ b/test/results/weebcentral.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import weebcentral + + +__tests__ = ( +{ + "#url" : "https://weebcentral.com/chapters/01J76XZ4PC3VW91BYFBQJA44C3", + "#class" : weebcentral.WeebcentralChapterExtractor, + "#pattern" : r"https://official\.lowee\.us/manga/Aria/0067\.5-0\d\d\.png", + "#count" : 17, + + "author" : ["AMANO Kozue"], + "chapter" : 67, + "chapter_id" : "01J76XZ4PC3VW91BYFBQJA44C3", + "chapter_minor": ".5", + "chapter_type" : "Navigation", + "count" : 17, + "description" : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.", + "extension" : "png", + "filename" : r"re:0067\.5-0\d\d", + "width" : {1129, 2133}, + "height" : {1511, 1600}, + "lang" : "en", + "language" : "English", + "manga" : "Aria", + "manga_id" : "01J76XY8G1GK8EJ9VQG92C3DKM", + "official" : True, + "page" : range(1, 17), + "release" : "2002", + "status" : "Complete", + "type" : "Manga", + "tags" : [ + "Adventure", + "Comedy", + "Drama", + "Sci-fi", + "Shounen", + "Slice of Life", + ], +}, + +{ + "#url" : "https://weebcentral.com/series/01J76XY8G1GK8EJ9VQG92C3DKM/Aria", + "#class" : weebcentral.WeebcentralMangaExtractor, + "#pattern" : weebcentral.WeebcentralChapterExtractor.pattern, + "#count" : 75, + + "author" : ["AMANO Kozue"], + "chapter" : range(1, 70), + "chapter_id" : r"re:01J\w{23}", + "chapter_minor": {"", ".5"}, + "chapter_type" : "Navigation", + "date" : "type:datetime", + "description" : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.", + "lang" : "en", + "language" : "English", + "manga" : "Aria", + "manga_id" : "01J76XY8G1GK8EJ9VQG92C3DKM", + "official" : True, + "release" : "2002", + "status" : "Complete", + "type" : "Manga", + "tags" : [ + "Adventure", + "Comedy", + "Drama", + "Sci-fi", + "Shounen", + "Slice of Life", + ], +}, + +)