Skip to content

Commit

Permalink
[weverse] add extractors
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenhilton committed Nov 5, 2023
1 parent 807ddde commit 3dfc3ce
Show file tree
Hide file tree
Showing 4 changed files with 413 additions and 0 deletions.
20 changes: 20 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3599,6 +3599,26 @@ Description
Download video files.


extractor.weverse.access-token
------------------------------
Type
``string``
Default
``null``
Description
Your Weverse account access token.

The token can be found in the ``we2_access_token`` cookie in the
``.weverse.io`` cookie domain after logging in to your account.

An invalid or not up-to-date value
will result in ``401 Unauthorized`` errors.

If this option is unset, and the cookie is not used, an extra HTTP
request will be sent with your ``username`` and ``password`` to
attempt to fetch a new token.


extractor.ytdl.enabled
----------------------
Type
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
"webmshare",
"webtoons",
"weibo",
"weverse",
"wikiart",
"wikifeet",
"xhamster",
Expand Down
352 changes: 352 additions & 0 deletions gallery_dl/extractor/weverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://weverse.io/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import binascii
import hashlib
import hmac
import time
import urllib.parse
import uuid

BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io"
COMMUNITY_PATTERN = BASE_PATTERN + r"/(\w+)"

MEMBER_ID_PATTERN = r"/([a-f0-9]+)"
POST_ID_PATTERN = r"/(\d-\d+)"


class WeverseExtractor(Extractor):
"""Base class for weverse extractors"""
category = "weverse"
cookies_domain = ".weverse.io"
cookies_names = ("we2_access_token",)
root = "https://weverse.io"
filename_fmt = "{filename}.{extension}"
request_interval = 1.0

def _init(self):
self.login()
if self.access_token:
self.api = WeverseAPI(self, self.access_token)

def login(self):
if self.config("access-token"):
self.access_token = self.config("access-token")
return

if not self.cookies_check(self.cookies_names):
username, password = self._get_auth_info()
if username:
self.cookies_update(
self._login_impl(username, password), self.cookies_domain)

self.access_token = self.cookies.get(self.cookies_names[0])

@cache(maxage=365*24*3600, keyarg=1)
def _login_impl(self, username, password):
endpoint = ("https://accountapi.weverse.io"
"/web/api/v2/auth/token/by-credentials")
data = {"email": username, "password": password}
headers = {
"x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a",
"x-acc-app-version": "2.2.20-alpha.0",
"x-acc-language": "en",
"x-acc-service-id": "weverse",
"x-acc-trace-id": str(uuid.uuid64())
}
res = self.request(
endpoint, method="POST", data=data, headers=headers).json()
if "accessToken" not in res:
raise exception.AuthenticationError()
return {self.cookies_names[0]: res["accessToken"]}

def metadata(self, data):
if "date" not in data and "publishedAt" in data:
data["date"] = text.parse_timestamp(data["publishedAt"] / 1000)

if "author_name" not in data and "author" in data:
author = data["author"]
data["author_name"] = author.get("artistOfficialProfile", {}).get(
"officialName") or author["profileName"]

def has_media(self, data):
for key in ("extension", "attachment", "photo", "video"):
if key in data and data[key] != {}:
return True
if "summary" in data:
for key in ("photoCount", "videoCount"):
if key in data["summary"] and text.parse_int(
data["summary"][key]
) > 0:
return True
return False


class WeversePostExtractor(WeverseExtractor):
"""Extractor for weverse posts"""
subcategory = "post"
directory_fmt = ("{category}", "{community[communityName]}",
"{author_name}", "{postId}")
archive_fmt = "{postId}"
pattern = COMMUNITY_PATTERN + r"/(?:artist|fanpost)" + POST_ID_PATTERN
example = "https://weverse.io/abcdef/artist/1-123456789"

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.post_id = match.group(2)

def items(self):
data = self.api.post(self.post_id)

# skip posts with no media
if not self.has_media(data):
self.log.debug("Skipping %s (no media)", self.url)
return

self.metadata(data)

attachments = data["attachment"]
del data["attachment"]

yield Message.Directory, data
for attachment_type, attachment_data in attachments.items():
for attachment in attachment_data.values():
url = ""
file_id = ""

if attachment_type == "photo":
url = attachment["url"]
file_id = attachment["photoId"]
if attachment_type == "video":
file_id = attachment["videoId"]
best_video = self.api.video(file_id)
url = best_video["url"]

data["filename"] = self.category + "_" + file_id
data["extension"] = text.ext_from_url(url)
yield Message.Url, url, data


class WeverseProfileExtractor(WeverseExtractor):
"""Extractor for weverse community profiles"""
subcategory = "profile"
pattern = COMMUNITY_PATTERN + "/profile" + MEMBER_ID_PATTERN
example = ("https://weverse.io/abcdef"
"/profile/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5")

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.member_id = match.group(2)

def items(self):
data = {"_extractor": WeversePostExtractor}
posts = self.api.profile(self.member_id)
for post in posts:
if not self.has_media(post):
continue
yield Message.Queue, post["shareUrl"], data


class WeverseArtistTabExtractor(WeverseExtractor):
"""Extractor for all artists in a weverse community"""
subcategory = "artist-tab"
pattern = COMMUNITY_PATTERN + "/artist$"
example = "https://weverse.io/abcdef/artist"

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.community_keyword = match.group(1)

def items(self):
data = {"_extractor": WeversePostExtractor}
posts = self.api.artist_tab(self.community_keyword)
for post in posts:
if not self.has_media(post):
continue
yield Message.Queue, post["shareUrl"], data


class WeverseMomentExtractor(WeverseExtractor):
"""Extractor for moments from a weverse community artist"""
subcategory = "moment"
directory_fmt = ("{category}", "{community[communityName]}",
"{author_name}", "{postId}")
archive_fmt = "{postId}"
pattern = (COMMUNITY_PATTERN +
"/moment" + MEMBER_ID_PATTERN +
"/post" + POST_ID_PATTERN)
example = ("https://weverse.io/abcdef"
"/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5"
"/post/1-123456789")

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.post_id = match.group(3)

def items(self):
data = self.api.post(self.post_id)

moment = {}
if "moment" in data["extension"]:
moment = data["extension"]["moment"]
elif "momentW1" in data["extension"]:
moment = data["extension"]["momentW1"]

# skip moments with no media
if not self.has_media(moment):
self.log.debug("Skipping %s (no media)", self.url)
return

self.metadata(data)

del data["extension"]
del data["authorMomentPosts"]

yield Message.Directory, data
url = ""
file_id = ""

if "photo" in moment:
url = moment["photo"]["url"]
file_id = moment["photo"]["photoId"]
if "video" in moment:
file_id = moment["video"]["videoId"]
best_video = self.api.video(file_id)
url = best_video["url"]

data["filename"] = self.category + "_" + file_id
data["extension"] = text.ext_from_url(url)
yield Message.Url, url, data


class WeverseMomentsExtractor(WeverseExtractor):
"""Extractor for all moments from a weverse community artist"""
subcategory = "moments"
pattern = COMMUNITY_PATTERN + "/moment" + MEMBER_ID_PATTERN + "$"
example = ("https://weverse.io/abcdef"
"/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5")

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.member_id = match.group(2)

def items(self):
data = {"_extractor": WeverseMomentExtractor}
moments = self.api.moments(self.member_id)
for moment in moments:
if not self.has_media(moment):
continue
yield Message.Queue, moment["shareUrl"], data


class WeverseAPI():
"""Interface for the Weverse API"""
BASE_API_URL = "https://global.apis.naver.com"

def __init__(self, extractor, access_token):
self.extractor = extractor
self.headers = {"Authorization": "Bearer " + access_token}

def _endpoint_with_params(self, endpoint, params):
params_delimiter = "?"
if "?" in endpoint:
params_delimiter = "&"
return endpoint + params_delimiter + urllib.parse.urlencode(
query=params)

def _message_digest(self, endpoint, params, timestamp):
key = "1b9cb6378d959b45714bec49971ade22e6e24e42".encode()
url = self._endpoint_with_params(endpoint, params)
message = "{}{}".format(url[:255], timestamp).encode()
hash = hmac.new(key, message, hashlib.sha1).digest()
return binascii.b2a_base64(hash).rstrip().decode()

def community_id(self, community_keyword):
endpoint = "/community/v1.0/communityIdUrlPathByUrlPathArtistCode"
params = {"keyword": community_keyword}
return self._call(endpoint, params)["communityId"]

def post(self, post_id):
endpoint = "/post/v1.0/post-{}".format(post_id)
params = {"fieldSet": "postV1"}
return self._call(endpoint, params)

def video(self, video_id):
endpoint = "/cvideo/v1.0/cvideo-{}/downloadInfo".format(video_id)
videos = self._call(endpoint)["downloadInfo"]
best_video = max(videos, key=lambda video:
text.parse_int(video["resolution"].rstrip("P")))
return best_video

def profile(self, member_id):
endpoint = "/post/v1.0/member-{}/posts".format(member_id)
params = {
"fieldSet": "postsV1",
"filterType": "DEFAULT",
"limit": 20,
"sortType": "LATEST"
}
yield from self._pagination(endpoint, params)

def artist_tab(self, community_keyword):
community_id = self.community_id(community_keyword)
endpoint = "/post/v1.0/community-{}/artistTabPosts".format(
community_id)
params = {
"fieldSet": "postsV1",
"limit": 20,
"pagingType": "CURSOR"
}
yield from self._pagination(endpoint, params)

def moments(self, member_id):
endpoint = "/post/v1.0/member-{}/posts".format(member_id)
params = {
"fieldSet": "postsV1",
"filterType": "MOMENT",
"limit": 1
}
yield from self._pagination(endpoint, params)

def _call(self, endpoint, params=None):
if params is None:
params = {}
params = util.combine_dict({
"appId": "be4d79eb8fc7bd008ee82c8ec4ff6fd4",
"language": "en",
"platform": "WEB",
"wpf": "pc"
}, params)
timestamp = int(time.time() * 1000)
message_digest = self._message_digest(endpoint, params, timestamp)
params = util.combine_dict(params, {
"wmsgpad": timestamp,
"wmd": message_digest
})
while True:
try:
return self.extractor.request(
self.BASE_API_URL + "/weverse/wevweb" + endpoint,
params=params, headers=self.headers,
).json()
except exception.HttpError as exc:
self.extractor.log.warning(exc)
return

def _pagination(self, endpoint, params=None):
if params is None:
params = {}
while True:
res = self._call(endpoint, params)
yield from res["data"]
if "nextParams" not in res["paging"]:
return
params["after"] = res["paging"]["nextParams"]["after"]
Loading

0 comments on commit 3dfc3ce

Please sign in to comment.