diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1a092263e5..a15566df98 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -31,6 +31,12 @@ Consider all sites to be NSFW unless otherwise known.
", ""))) + return data + + +class _4archiveBoardExtractor(Extractor): + """Extractor for 4archive boards""" + category = "4archive" + subcategory = "board" + root = "https://4archive.org" + pattern = r"(?:https?://)?4archive\.org/board/([^/?#]+)(?:/(\d+))?/?$" + example = "https://4archive.org/board/a/" + + def __init__(self, match): + Extractor.__init__(self, match) + self.board = match.group(1) + self.num = text.parse_int(match.group(2), 1) + + def items(self): + data = {"_extractor": _4archiveThreadExtractor} + while True: + url = "{}/board/{}/{}".format(self.root, self.board, self.num) + page = self.request(url).text + if 'class="thread"' not in page: + return + for thread in text.extract_iter(page, 'class="thread" id="t', '"'): + url = "{}/board/{}/thread/{}".format( + self.root, self.board, thread) + yield Message.Queue, url, data + self.num += 1 diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1c1473a0cf..22e4fe3412 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -15,6 +15,7 @@ "35photo", "3dbooru", "4chan", + "4archive", "4chanarchives", "500px", "8chan", diff --git a/test/results/4archive.py b/test/results/4archive.py new file mode 100644 index 0000000000..ec90b92919 --- /dev/null +++ b/test/results/4archive.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +gallery_dl = __import__("gallery_dl.extractor.4archive") +_4archive = getattr(gallery_dl.extractor, "4archive") +import datetime + + +__tests__ = ( +{ + "#url" : "https://4archive.org/board/u/thread/2397221", + "#category": ("", "4archive", "thread"), + "#class" : _4archive._4archiveThreadExtractor, + "#pattern" : r"https://i\.imgur\.com/\w{7}\.\w+$", + "#count" : 16, + + "board" : "u", + "com" : str, + "date" : datetime.datetime, + "name" : "Anonymous", + "no" : range(2397221, 2418158), + "thread": 2397221, + "time" : int, + "title" : "best anime", + "url" : str, + "width" : int, + "height": int, + "size" : int, +}, + +{ + "#url" : "https://4archive.org/board/jp/thread/17611798", + "#category": ("", "4archive", "thread"), + "#class" : _4archive._4archiveThreadExtractor, + "#pattern" : r"https://i\.imgur\.com/\w{7}\.\w+$", + "#count" : 85, +}, + +{ + "#url" : "https://4archive.org/board/u", + "#category": ("", "4archive", "board"), + "#class" : _4archive._4archiveBoardExtractor, + "#pattern" : _4archive._4archiveThreadExtractor.pattern, + "#range" : "1-20", + "#count" : 20, +}, + +{ + "#url" : "https://4archive.org/board/jp/10", + "#category": ("", "4archive", "board"), + "#class" : _4archive._4archiveBoardExtractor, + "#pattern" : _4archive._4archiveThreadExtractor.pattern, + "#range" : "1-50", + "#count" : 50, +} + +)