diff --git a/docs/configuration.rst b/docs/configuration.rst index 304ddb4f93..257abfbc1a 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -388,6 +388,7 @@ Description * ``aibooru`` (*) * ``aryion`` * ``atfbooru`` (*) + * ``bluesky`` * ``danbooru`` (*) * ``e621`` (*) * ``e926`` (*) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e810f42215..869c003d6d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -121,6 +121,12 @@ Consider all listed sites to potentially be NSFW. Collections, Galleries, User Profiles + + Bluesky + https://bsky.app/ + Likes, Media Files, Posts, Replies, User Profiles + Supported + Bunkr https://bunkr.sk/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d624736211..a665249744 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -29,6 +29,7 @@ "bbc", "behance", "blogger", + "bluesky", "bunkr", "catbox", "chevereto", diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py new file mode 100644 index 0000000000..b271630066 --- /dev/null +++ b/gallery_dl/extractor/bluesky.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bsky.app/""" + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import cache, memcache + +BASE_PATTERN = r"(?:https?://)?bsky\.app" + + +class BlueskyExtractor(Extractor): + """Base class for bluesky extractors""" + category = "bluesky" + directory_fmt = ("{category}", "{author[handle]}") + filename_fmt = "{indexedAt[:19]}_{post_id}_{num}.{extension}" + archive_fmt = "{filename}" + root = "https://bsky.app" + + def __init__(self, match): + Extractor.__init__(self, match) + self.handle = match.group(1) + + def _init(self): + self.api = BlueskyAPI(self) + + def items(self): + for post in self.posts(): + post = post["post"] + + try: + images = post["embed"]["images"] + except KeyError: + images = () + + post["post_id"] = post["uri"].rpartition("/")[2] + post["count"] = len(images) + post["date"] = text.parse_datetime( + post["indexedAt"][:19], "%Y-%m-%dT%H:%M:%S") + + yield Message.Directory, post + + post["num"] = 0 + for file in images: + post["num"] += 1 + post["description"] = file["alt"] + + try: + aspect = file["aspectRatio"] + post["width"] = aspect["width"] + post["height"] = aspect["height"] + except KeyError: + post["width"] = post["height"] = 0 + + url = file["fullsize"] + name = url.rpartition("/")[2] + post["filename"], _, post["extension"] = name.rpartition("@") + + yield Message.Url, url, post + + def posts(self): + return () + + +class BlueskyUserExtractor(BlueskyExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)$" + example = "https://bsky.app/profile/HANDLE" + + def initialize(self): + pass + + def items(self): + base = "{}/profile/{}/".format(self.root, self.handle) + return self._dispatch_extractors(( + (BlueskyPostsExtractor , base + "posts"), + (BlueskyRepliesExtractor, base + "replies"), + (BlueskyMediaExtractor , base + "media"), + (BlueskyLikesExtractor , base + "likes"), + ), ("media",)) + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_author_feed(did) + + +class BlueskyPostsExtractor(BlueskyExtractor): + subcategory = "posts" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/posts" + example = "https://bsky.app/profile/HANDLE/posts" + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_author_feed(did, "posts_and_author_threads") + + +class BlueskyRepliesExtractor(BlueskyExtractor): + subcategory = "replies" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/replies" + example = "https://bsky.app/profile/HANDLE/replies" + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_author_feed(did, "posts_with_replies") + + +class BlueskyMediaExtractor(BlueskyExtractor): + subcategory = "media" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/media" + example = "https://bsky.app/profile/HANDLE/media" + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_author_feed(did, "posts_with_media") + + +class BlueskyLikesExtractor(BlueskyExtractor): + subcategory = "likes" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/likes" + example = "https://bsky.app/profile/HANDLE/likes" + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_actor_likes(did) + + +class BlueskyPostExtractor(BlueskyExtractor): + subcategory = "post" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/post/([^/?#]+)" + example = "https://bsky.app/profile/HANDLE/post/ID" + + def __init__(self, match): + BlueskyExtractor.__init__(self, match) + self.post_id = match.group(2) + + def posts(self): + did = self.api.resolve_handle(self.handle) + return self.api.get_post_thread(did, self.post_id) + + +class BlueskyAPI(): + """Interface for the Bluesky API + + https://www.docs.bsky.app/docs/category/http-reference + """ + + def __init__(self, extractor): + self.headers = {} + self.extractor = extractor + self.log = extractor.log + + self.username, self.password = extractor._get_auth_info() + if self.username: + self.root = "https://bsky.social" + else: + self.root = "https://api.bsky.app" + self.authenticate = util.noop + + def get_actor_likes(self, actor): + endpoint = "app.bsky.feed.getActorLikes" + params = { + "actor": actor, + "limit": "100", + } + return self._pagination(endpoint, params) + + def get_author_feed(self, actor, filter="posts_and_author_threads"): + endpoint = "app.bsky.feed.getAuthorFeed" + params = { + "actor" : actor, + "filter": filter, + "limit" : "100", + } + return self._pagination(endpoint, params) + + def get_post_thread(self, actor, post_id): + endpoint = "app.bsky.feed.getPostThread" + params = { + "uri": "at://{}/app.bsky.feed.post/{}".format(actor, post_id), + } + return (self._call(endpoint, params)["thread"],) + + def get_profile(self, actor): + endpoint = "app.bsky.actor.getProfile" + params = {"actor": actor} + return self._call(endpoint, params) + + @memcache(keyarg=1) + def resolve_handle(self, handle): + endpoint = "com.atproto.identity.resolveHandle" + params = {"handle": handle} + return self._call(endpoint, params)["did"] + + def authenticate(self): + self.headers["Authorization"] = self._authenticate_impl(self.username) + + @cache(maxage=3600, keyarg=1) + def _authenticate_impl(self, username): + refresh_token = _refresh_token_cache(username) + + if refresh_token: + self.log.info("Refreshing access token for %s", username) + endpoint = "com.atproto.server.refreshSession" + headers = {"Authorization": "Bearer " + refresh_token} + data = None + else: + self.log.info("Logging in as %s", username) + endpoint = "com.atproto.server.createSession" + headers = None + data = { + "identifier": username, + "password" : self.password, + } + + url = "{}/xrpc/{}".format(self.root, endpoint) + response = self.extractor.request( + url, method="POST", headers=headers, json=data, fatal=None) + data = response.json() + + if response.status_code != 200: + self.log.debug("Server response: %s", data) + raise exception.AuthenticationError('"{}: {}"'.format( + data.get("error"), data.get("message"))) + + if not refresh_token: + _refresh_token_cache.update(self.username, data["refreshJwt"]) + return "Bearer " + data["accessJwt"] + + def _call(self, endpoint, params): + url = "{}/xrpc/{}".format(self.root, endpoint) + + while True: + self.authenticate() + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + + if response.status_code < 400: + return response.json() + if response.status_code == 429: + self.extractor.wait(seconds=60) + continue + + self.extractor.log.debug("Server response: %s", response.text) + raise exception.StopExtraction( + "API request failed (%s %s)", + response.status_code, response.reason) + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + yield from data["feed"] + + cursor = data.get("cursor") + if not cursor: + return + params["cursor"] = cursor + + +@cache(maxage=84*86400, keyarg=0) +def _refresh_token_cache(username): + return None diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 50b6e5d8ce..68db90e91a 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -174,6 +174,9 @@ "artstation": { "artwork": "Artwork Listings", }, + "bluesky": { + "posts": "", + }, "coomerparty": { "discord" : "", "discord-server": "", @@ -339,6 +342,7 @@ "aryion" : "Supported", "atfbooru" : "Supported", "baraag" : _OAUTH, + "bluesky" : "Supported", "coomerparty" : "Supported", "danbooru" : "Supported", "derpibooru" : _APIKEY_DB, diff --git a/test/results/bluesky.py b/test/results/bluesky.py new file mode 100644 index 0000000000..fe53149096 --- /dev/null +++ b/test/results/bluesky.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import bluesky + + +__tests__ = ( +{ + "#url" : "https://bsky.app/profile/bsky.app/post/3kh5rarr3gn2n", + "#category": ("", "bluesky", "post"), + "#class" : bluesky.BlueskyPostExtractor, + "#urls" : "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri@jpeg", + "#sha1_content": "c36a27d135277dc08b7bfd289e4078af7b32c720", + + "author": { + "avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze@jpeg", + "did" : "did:plc:z72i7hdynmk6r22z27h6tvur", + "displayName": "Bluesky", + "handle" : "bsky.app", + "labels" : [], + }, + "cid" : "bafyreihh7m6bfrwlcjfklwturmja7qfse5gte7lskpmgw76flivimbnoqm", + "count" : 1, + "date" : "dt:2023-12-22 18:58:32", + "description": "The bluesky logo with the blue butterfly", + "extension" : "jpeg", + "filename" : "bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri", + "height" : 630, + "indexedAt" : "2023-12-22T18:58:32.715Z", + "labels" : [], + "likeCount" : int, + "num" : 1, + "post_id" : "3kh5rarr3gn2n", + "replyCount" : int, + "repostCount": int, + "uri" : "at://did:plc:z72i7hdynmk6r22z27h6tvur/app.bsky.feed.post/3kh5rarr3gn2n", + "width" : 1200, +}, + +)