Skip to content

Commit

Permalink
Fetch comments support (#9)
Browse files Browse the repository at this point in the history
* Fetch comments support

* Bump 24ur-api, support more 24ur sub-sites
  • Loading branch information
amadejkastelic authored Aug 30, 2024
1 parent c9508e0 commit 3d82ce5
Show file tree
Hide file tree
Showing 17 changed files with 449 additions and 130 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pymemcache = "==4.0.0"
"discord-oauth2.py" = "==1.2.1"
twitch-dl = "==2.3.1"
pydantic = "==2.8.2"
"24ur-api[download]" = "==0.1.4"
"24ur-api[video]" = "==0.1.5"

[dev-packages]
black = "==24.8.0"
Expand Down
215 changes: 108 additions & 107 deletions Pipfile.lock

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions bot/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import datetime
import io
import mimetypes
import os
Expand Down Expand Up @@ -110,3 +111,19 @@ def temp_open(path: str, mode: str = 'rb'):
finally:
f.close()
os.remove(path)


def number_to_human_format(number: int) -> str:
num = float('{:.3g}'.format(number))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])


def date_to_human_format(date: datetime.datetime) -> str:
if date.hour == 0 and date.minute == 0:
return date.strftime('%b %-d, %Y')

return date.strftime('%H:%M · %b %-d, %Y')
48 changes: 34 additions & 14 deletions bot/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dataclasses import dataclass

from bot import constants
from bot.common import utils


DEFAULT_POST_FORMAT = """🔗 URL: {url}
Expand All @@ -14,6 +15,12 @@
📕 Description: {description}\n
"""

DEFAULT_COMMENT_FORMAT = """🧑🏻‍🎨 Author: {author}
📅 Created: {created}
👍🏻 Likes: {likes}
📕 Comment: {comment}\n
"""

SERVER_INFO_FORMAT = """```yml
Tier: {tier}
Prefix: {prefix}
Expand Down Expand Up @@ -100,10 +107,10 @@ def __str__(self) -> str:
return self._format.format(
url=self.url,
author=self.author or '❌',
created=self._date_human_format(date=self.created) if self.created else '❌',
created=utils.date_to_human_format(self.created) if self.created else '❌',
description=description if not self.spoiler else f'||{description}||',
views=self._number_human_format(num=self.views) if self.views else '❌',
likes=self._number_human_format(num=self.likes) if self.likes else '❌',
views=utils.number_to_human_format(self.views) if self.views else '❌',
likes=utils.number_to_human_format(self.likes) if self.likes else '❌',
)

def set_format(self, fmt: typing.Optional[str]) -> None:
Expand All @@ -118,16 +125,29 @@ def read_buffer(self) -> typing.Optional[bytes]:
self.buffer.seek(0)
return res

def _number_human_format(self, num: int) -> str:
num = float('{:.3g}'.format(num))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

def _date_human_format(self, date: datetime.datetime) -> str:
if date.hour == 0 and date.minute == 0:
return date.strftime('%b %-d, %Y')
@dataclass
class Comment:
author: typing.Optional[str] = None
created: typing.Optional[datetime.datetime] = None
likes: typing.Optional[int] = None
comment: typing.Optional[str] = None
spoiler: bool = False
_format: str = DEFAULT_COMMENT_FORMAT

def __str__(self) -> str:
comment = self.comment or '❌'

return self._format.format(
author=self.author or '❌',
created=utils.date_to_human_format(self.created) if self.created else '❌',
likes=utils.number_to_human_format(self.likes) if self.likes else '❌',
comment=comment if not self.spoiler else f'||{comment}||',
)

def set_format(self, fmt: typing.Optional[str]) -> None:
self._format = fmt or DEFAULT_COMMENT_FORMAT


return date.strftime('%H:%M · %b %-d, %Y')
def comments_to_string(comments: typing.List[Comment]) -> str:
return ''.join([str(comment) for comment in comments])
3 changes: 3 additions & 0 deletions bot/downloader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ async def get_integration_data(self, url: str) -> typing.Tuple[constants.Integra
async def get_post(self, url: str) -> domain.Post:
raise NotImplementedError()

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise NotImplementedError()

async def _download(self, url: str, cookies: typing.Optional[typing.Dict[str, str]] = None, **kwargs) -> io.BytesIO:
async with aiohttp.ClientSession(cookies=cookies) as session:
async with session.get(url=url, **kwargs) as resp:
Expand Down
4 changes: 4 additions & 0 deletions bot/downloader/facebook/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from bot import constants
from bot import domain
from bot import exceptions
from bot import logger
from bot.downloader import base
from bot.downloader.facebook import config
Expand Down Expand Up @@ -68,3 +69,6 @@ async def get_post(self, url: str) -> domain.Post:
post.buffer = await self._download(url=fb_post['images'][0])

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')
19 changes: 19 additions & 0 deletions bot/downloader/instagram/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,25 @@ async def get_post(self, url: str) -> domain.Post:

raise NotImplementedError(f'Not yet implemented for {url}')

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
uid, _, _ = self._parse_url(url)
p = instaloader.Post.from_shortcode(context=self.client.context, shortcode=uid)

comments = []
for i, comment in enumerate(p.get_comments()):
comments.append(
domain.Comment(
author=comment.owner.username,
created=comment.created_at_utc,
likes=comment.likes_count,
comment=comment.text,
)
)
if i + 1 == n:
break

return comments

@staticmethod
def _parse_url(url: str) -> typing.Tuple[str, int, constants.LinkType]:
parsed_url = urlparse(url)
Expand Down
24 changes: 23 additions & 1 deletion bot/downloader/reddit/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
import io
import glob
import io
import os
import re
import shutil
Expand Down Expand Up @@ -80,6 +80,28 @@ async def get_post(self, url: str) -> domain.Post:

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
if not self.client:
raise exceptions.ConfigurationError('Reddit credentials not configured')

try:
submission = await self.client.submission(url=url)
except praw_exceptions.InvalidURL:
# Hack for new reddit urls generated in mobile app
# Does another request, which redirects to the correct url
url = requests.get(url, timeout=base.DEFAULT_TIMEOUT).url.split('?')[0]
submission = await self.client.submission(url=url)

return [
domain.Comment(
author=comment.author,
created=datetime.datetime.fromtimestamp(comment.created_utc).astimezone(),
likes=comment.score,
comment=comment.body,
)
for comment in submission.comments[:n]
]

async def _hydrate_post(self, post: domain.Post) -> bool:
if not self.client:
return self._hydrate_post_no_login(post)
Expand Down
3 changes: 3 additions & 0 deletions bot/downloader/threads/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ async def get_post(self, url: str) -> domain.Post:

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')

def _get_thread_id(self, url_id: str) -> str:
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'

Expand Down
17 changes: 17 additions & 0 deletions bot/downloader/tiktok/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,23 @@ async def get_post(self, url: str) -> domain.Post:
created=video.create_time.astimezone(),
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
clean_url = self._clean_url(url)

async with AsyncTikTokAPI() as api:
video = await api.video(clean_url)

logger.debug('Trying to fetch tiktok comments', url=url)

return [
domain.Comment(
author=comment.user.unique_id if isinstance(comment.user, user.LightUser) else comment.user,
likes=comment.digg_count,
comment=comment.text,
)
async for comment in video.comments.limit(n)
]

async def _download_slideshow(self, video: tiktok_video.Video, cookies: typing.Dict[str, str]) -> io.BytesIO:
vf = (
'"scale=iw*min(1080/iw\\,1920/ih):ih*min(1080/iw\\,1920/ih),'
Expand Down
22 changes: 21 additions & 1 deletion bot/downloader/twenty4ur/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,16 @@


class Twenty4UrClientSingleton(base.BaseClientSingleton):
DOMAINS = ['24ur.com']
DOMAINS = [
'24ur.com',
'zadovoljna.si',
'bibaleze.si',
'vizita.si',
'cekin.si',
'moskisvet.com',
'dominvrt.si',
'okusno.je',
]
_CONFIG_SCHEMA = config.Twenty4UrConfig

@classmethod
Expand Down Expand Up @@ -54,3 +63,14 @@ async def get_post(self, url: str) -> domain.Post:
views=article.num_views,
buffer=buffer,
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
return [
domain.Comment(
author=comment.author,
created=comment.posted_at,
likes=comment.score,
comment=comment.content,
)
for comment in (await self.client.get_article_by_url(url=url, num_comments=n)).comments
]
3 changes: 3 additions & 0 deletions bot/downloader/twitch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ async def get_post(self, url: str) -> domain.Post:
buffer=io.BytesIO(resp.content),
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')

@staticmethod
def _find_quality(qualities: typing.List[twitch.VideoQuality], max_quality: int = 720) -> int:
"""
Expand Down
47 changes: 41 additions & 6 deletions bot/downloader/twitter/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,7 @@ async def relogin(self) -> None:
if self.client:
await self.client.pool.relogin(usernames=[self.username])

async def get_post(self, url: str) -> domain.Post:
uid, index = self._parse_url(url)

if self.client is None:
return await self._get_post_no_login(url=url, uid=uid, index=index or 0)

async def login(self) -> None:
if not self.logged_in:
await self.client.pool.add_account(
username=self.username,
Expand All @@ -99,8 +94,48 @@ async def get_post(self, url: str) -> domain.Post:
)
await self.client.pool.login_all()

async def get_post(self, url: str) -> domain.Post:
uid, index = self._parse_url(url)

if self.client is None:
return await self._get_post_no_login(url=url, uid=uid, index=index or 0)

await self.login()

return await self._get_post_login(url=url, uid=uid, index=index)

async def get_comments(
self,
url: str,
n: int = 5,
retry_count: int = 0,
) -> typing.List[domain.Comment]:
if not self.client:
raise exceptions.NotAllowedError('Twitter credentials not configured')

await self.login()

uid, _ = self._parse_url(url)
try:
replies = self.client.tweet_replies(twid=int(uid), limit=n)
except Exception as e:
logger.error('Failed fetching from twitter, retrying', error=str(e))
if retry_count == 0:
await self.relogin()
return await self.get_comments(url=url, n=n, retry_count=retry_count + 1)

raise exceptions.IntegrationClientError('Failed fetching from twitter') from e

return [
domain.Comment(
author=f'{reply.user.displayname} ({reply.user.username})',
created=reply.date.astimezone(),
likes=reply.likeCount,
comment=reply.rawContent,
)
async for reply in replies
][:n]

async def _get_post_login(
self,
url: str,
Expand Down
5 changes: 5 additions & 0 deletions bot/downloader/youtube/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from bot import constants
from bot import domain
from bot import exceptions
from bot import logger
from bot.downloader import base
from bot.downloader.youtube import config
Expand Down Expand Up @@ -43,10 +44,14 @@ async def get_post(self, url: str) -> domain.Post:
views=vid.views,
created=vid.publish_date,
buffer=io.BytesIO(),
spoiler=vid.age_restricted is True,
)

vid.streams.filter(progressive=True, file_extension='mp4').order_by(
'resolution'
).desc().first().stream_to_buffer(post.buffer)

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')
5 changes: 5 additions & 0 deletions bot/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def __init__(self, action: str) -> None:
super().__init__(f'Action not allowed: {action}')


class NotSupportedError(BaseError):
def __init__(self, action: str) -> None:
super().__init__(f'Action not supported: {action}')


class ConfigurationError(BaseError):
pass

Expand Down
Loading

0 comments on commit 3d82ce5

Please sign in to comment.