diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e810f42215..301cdfbcf5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -973,6 +973,12 @@ Consider all listed sites to potentially be NSFW. Comics, Episodes + + WebtoonXYZ + https://www.webtoon.xyz/ + Chapters, Manga + + Weibo https://www.weibo.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d624736211..17dbc41718 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -174,6 +174,7 @@ "weasyl", "webmshare", "webtoons", + "webtoonxyz", "weibo", "wikiart", "wikifeet", diff --git a/gallery_dl/extractor/webtoonxyz.py b/gallery_dl/extractor/webtoonxyz.py new file mode 100644 index 0000000000..29825eb8f1 --- /dev/null +++ b/gallery_dl/extractor/webtoonxyz.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.webtoon.xyz/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + + +class WebtoonxyzBase(): + """Base class for Webtoon.xyz extractors""" + category = "webtoonxyz" + root = "https://www.webtoon.xyz" + + @staticmethod + def parse_chapter_string(chapter_string, data): + match = re.match( + r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?", + text.unescape(chapter_string).strip()) + manga, chapter, minor, title = match.groups() + manga = manga.strip() if manga else "" + data["manga"] = data.pop("manga", manga) + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = minor or "" + data["title"] = title or "" + data["lang"] = "en" + data["language"] = "English" + + +class WebtoonxyzChapterExtractor(WebtoonxyzBase, ChapterExtractor): + """Extractor for manga-chapters from www.webtoon.xyz""" + pattern = (r"(?:https?://)?(?:www\.)?webtoon\.xyz" + r"(/read/[^/?#]+/[^/?#]+)") + example = "https://www.webtoon.xyz/read/MANGA/chapter-01/" + + def metadata(self, page): + tags = text.extr(page, 'class="wp-manga-tags-list">', '') + data = {"tags": list(text.split_html(tags)[::2])} + info = text.extr(page, '

', "

") + if not info: + raise exception.NotFoundError("chapter") + self.parse_chapter_string(info, data) + return data + + def images(self, page): + page = text.extr( + page, '
', '
"): + url , pos = text.extract(chapter, '", "", pos) + self.parse_chapter_string(info, data) + result.append((url, data.copy())) + return result + + def metadata(self, page): + extr = text.extract_from(text.extr( + page, 'class="summary_content">', 'class="manga-action"')) + return { + "manga" : text.extr(page, "

", "

").strip(), + "description": text.unescape(text.remove_html(text.extract( + page, ">", "
", page.index("summary__content"))[0])), + "rating" : text.parse_float( + extr('total_votes">', "").strip()), + "manga_alt" : text.remove_html( + extr("Alternative \n
", "")).split("; "), + "author" : list(text.extract_iter( + extr('class="author-content">', ""), '"tag">', "")), + "artist" : list(text.extract_iter( + extr('class="artist-content">', ""), '"tag">', "")), + "genres" : list(text.extract_iter( + extr('class="genres-content">', ""), '"tag">', "")), + "type" : text.remove_html( + extr("Type \n", "")), + "release" : text.parse_int(text.remove_html( + extr("Release \n", ""))), + "status" : text.remove_html( + extr("Status \n", "")), + } diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 50b6e5d8ce..9e9cf9f67d 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -140,6 +140,7 @@ "wallpapercave" : "Wallpaper Cave", "webmshare" : "webmshare", "webtoons" : "Webtoon", + "webtoonxyz" : "Webtoon.xyz", "wikiart" : "WikiArt.org", "wikimediacommons": "Wikimedia Commons", "xbunkr" : "xBunkr", diff --git a/test/results/webtoonxyz.py b/test/results/webtoonxyz.py new file mode 100644 index 0000000000..9f0d40eea5 --- /dev/null +++ b/test/results/webtoonxyz.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import webtoonxyz +from gallery_dl import exception + + +__tests__ = ( +{ + "#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/chapter-105/", + "#category": ("", "webtoonxyz", "chapter"), + "#class" : webtoonxyz.WebtoonxyzChapterExtractor, + "#pattern" : r"https://www\.webtoon\.xyz/wp-content/uploads/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+", + "#count" : 11, + + "manga" : "The World After The End", + "title" : "", + "chapter" : 105, + "lang" : "en", + "language" : "English", +}, + +{ + "#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/chapter-1000000/", + "#category": ("", "webtoonxyz", "chapter"), + "#class" : webtoonxyz.WebtoonxyzChapterExtractor, + "#exception": exception.NotFoundError, +}, + +{ + "#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/", + "#category": ("", "webtoonxyz", "manga"), + "#class" : webtoonxyz.WebtoonxyzMangaExtractor, + "#pattern" : r"https://www\.webtoon\.xyz/read/such-a-cute-spy/chapter-\d+([_-].+)?/", + "#count" : ">= 13", + + "manga" : "The World After The End", + "author" : ["S-Cynaan", "Sing Shong"], + "artist" : ["Undead Potato"], + "genres" : [ + "Action", + "Adventure", + "Fantasy", + ], + "rating" : float, + "status" : "OnGoing", + "lang" : "en", + "language" : "English", + "manga_alt" : list, +}, + +{ + "#url" : "https://www.webtoon.xyz/read/doesnotexist", + "#category": ("", "webtoonxyz", "manga"), + "#class" : webtoonxyz.WebtoonxyzMangaExtractor, + "#exception": exception.HttpError, +}, + +)