Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fetch comments support #9

Merged
merged 2 commits into from
Aug 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@ pymemcache = "==4.0.0"
"discord-oauth2.py" = "==1.2.1"
twitch-dl = "==2.3.1"
pydantic = "==2.8.2"
"24ur-api[download]" = "==0.1.4"
"24ur-api[video]" = "==0.1.5"

[dev-packages]
black = "==24.8.0"
215 changes: 108 additions & 107 deletions Pipfile.lock

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions bot/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import datetime
import io
import mimetypes
import os
@@ -110,3 +111,19 @@ def temp_open(path: str, mode: str = 'rb'):
finally:
f.close()
os.remove(path)


def number_to_human_format(number: int) -> str:
num = float('{:.3g}'.format(number))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])


def date_to_human_format(date: datetime.datetime) -> str:
if date.hour == 0 and date.minute == 0:
return date.strftime('%b %-d, %Y')

return date.strftime('%H:%M · %b %-d, %Y')
48 changes: 34 additions & 14 deletions bot/domain.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
from dataclasses import dataclass

from bot import constants
from bot.common import utils


DEFAULT_POST_FORMAT = """🔗 URL: {url}
@@ -14,6 +15,12 @@
📕 Description: {description}\n
"""

DEFAULT_COMMENT_FORMAT = """🧑🏻‍🎨 Author: {author}
📅 Created: {created}
👍🏻 Likes: {likes}
📕 Comment: {comment}\n
"""

SERVER_INFO_FORMAT = """```yml
Tier: {tier}
Prefix: {prefix}
@@ -100,10 +107,10 @@ def __str__(self) -> str:
return self._format.format(
url=self.url,
author=self.author or '❌',
created=self._date_human_format(date=self.created) if self.created else '❌',
created=utils.date_to_human_format(self.created) if self.created else '❌',
description=description if not self.spoiler else f'||{description}||',
views=self._number_human_format(num=self.views) if self.views else '❌',
likes=self._number_human_format(num=self.likes) if self.likes else '❌',
views=utils.number_to_human_format(self.views) if self.views else '❌',
likes=utils.number_to_human_format(self.likes) if self.likes else '❌',
)

def set_format(self, fmt: typing.Optional[str]) -> None:
@@ -118,16 +125,29 @@ def read_buffer(self) -> typing.Optional[bytes]:
self.buffer.seek(0)
return res

def _number_human_format(self, num: int) -> str:
num = float('{:.3g}'.format(num))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

def _date_human_format(self, date: datetime.datetime) -> str:
if date.hour == 0 and date.minute == 0:
return date.strftime('%b %-d, %Y')
@dataclass
class Comment:
author: typing.Optional[str] = None
created: typing.Optional[datetime.datetime] = None
likes: typing.Optional[int] = None
comment: typing.Optional[str] = None
spoiler: bool = False
_format: str = DEFAULT_COMMENT_FORMAT

def __str__(self) -> str:
comment = self.comment or '❌'

return self._format.format(
author=self.author or '❌',
created=utils.date_to_human_format(self.created) if self.created else '❌',
likes=utils.number_to_human_format(self.likes) if self.likes else '❌',
comment=comment if not self.spoiler else f'||{comment}||',
)

def set_format(self, fmt: typing.Optional[str]) -> None:
self._format = fmt or DEFAULT_COMMENT_FORMAT


return date.strftime('%H:%M · %b %-d, %Y')
def comments_to_string(comments: typing.List[Comment]) -> str:
return ''.join([str(comment) for comment in comments])
3 changes: 3 additions & 0 deletions bot/downloader/base.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,9 @@ async def get_integration_data(self, url: str) -> typing.Tuple[constants.Integra
async def get_post(self, url: str) -> domain.Post:
raise NotImplementedError()

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise NotImplementedError()

async def _download(self, url: str, cookies: typing.Optional[typing.Dict[str, str]] = None, **kwargs) -> io.BytesIO:
async with aiohttp.ClientSession(cookies=cookies) as session:
async with session.get(url=url, **kwargs) as resp:
4 changes: 4 additions & 0 deletions bot/downloader/facebook/client.py
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@

from bot import constants
from bot import domain
from bot import exceptions
from bot import logger
from bot.downloader import base
from bot.downloader.facebook import config
@@ -68,3 +69,6 @@ async def get_post(self, url: str) -> domain.Post:
post.buffer = await self._download(url=fb_post['images'][0])

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')
19 changes: 19 additions & 0 deletions bot/downloader/instagram/client.py
Original file line number Diff line number Diff line change
@@ -70,6 +70,25 @@ async def get_post(self, url: str) -> domain.Post:

raise NotImplementedError(f'Not yet implemented for {url}')

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
uid, _, _ = self._parse_url(url)
p = instaloader.Post.from_shortcode(context=self.client.context, shortcode=uid)

comments = []
for i, comment in enumerate(p.get_comments()):
comments.append(
domain.Comment(
author=comment.owner.username,
created=comment.created_at_utc,
likes=comment.likes_count,
comment=comment.text,
)
)
if i + 1 == n:
break

return comments

@staticmethod
def _parse_url(url: str) -> typing.Tuple[str, int, constants.LinkType]:
parsed_url = urlparse(url)
24 changes: 23 additions & 1 deletion bot/downloader/reddit/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
import io
import glob
import io
import os
import re
import shutil
@@ -80,6 +80,28 @@ async def get_post(self, url: str) -> domain.Post:

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
if not self.client:
raise exceptions.ConfigurationError('Reddit credentials not configured')

try:
submission = await self.client.submission(url=url)
except praw_exceptions.InvalidURL:
# Hack for new reddit urls generated in mobile app
# Does another request, which redirects to the correct url
url = requests.get(url, timeout=base.DEFAULT_TIMEOUT).url.split('?')[0]
submission = await self.client.submission(url=url)

return [
domain.Comment(
author=comment.author,
created=datetime.datetime.fromtimestamp(comment.created_utc).astimezone(),
likes=comment.score,
comment=comment.body,
)
for comment in submission.comments[:n]
]

async def _hydrate_post(self, post: domain.Post) -> bool:
if not self.client:
return self._hydrate_post_no_login(post)
3 changes: 3 additions & 0 deletions bot/downloader/threads/client.py
Original file line number Diff line number Diff line change
@@ -97,6 +97,9 @@ async def get_post(self, url: str) -> domain.Post:

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')

def _get_thread_id(self, url_id: str) -> str:
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'

17 changes: 17 additions & 0 deletions bot/downloader/tiktok/client.py
Original file line number Diff line number Diff line change
@@ -76,6 +76,23 @@ async def get_post(self, url: str) -> domain.Post:
created=video.create_time.astimezone(),
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
clean_url = self._clean_url(url)

async with AsyncTikTokAPI() as api:
video = await api.video(clean_url)

logger.debug('Trying to fetch tiktok comments', url=url)

return [
domain.Comment(
author=comment.user.unique_id if isinstance(comment.user, user.LightUser) else comment.user,
likes=comment.digg_count,
comment=comment.text,
)
async for comment in video.comments.limit(n)
]

async def _download_slideshow(self, video: tiktok_video.Video, cookies: typing.Dict[str, str]) -> io.BytesIO:
vf = (
'"scale=iw*min(1080/iw\\,1920/ih):ih*min(1080/iw\\,1920/ih),'
22 changes: 21 additions & 1 deletion bot/downloader/twenty4ur/client.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,16 @@


class Twenty4UrClientSingleton(base.BaseClientSingleton):
DOMAINS = ['24ur.com']
DOMAINS = [
'24ur.com',
'zadovoljna.si',
'bibaleze.si',
'vizita.si',
'cekin.si',
'moskisvet.com',
'dominvrt.si',
'okusno.je',
]
_CONFIG_SCHEMA = config.Twenty4UrConfig

@classmethod
@@ -54,3 +63,14 @@ async def get_post(self, url: str) -> domain.Post:
views=article.num_views,
buffer=buffer,
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
return [
domain.Comment(
author=comment.author,
created=comment.posted_at,
likes=comment.score,
comment=comment.content,
)
for comment in (await self.client.get_article_by_url(url=url, num_comments=n)).comments
]
3 changes: 3 additions & 0 deletions bot/downloader/twitch/client.py
Original file line number Diff line number Diff line change
@@ -58,6 +58,9 @@ async def get_post(self, url: str) -> domain.Post:
buffer=io.BytesIO(resp.content),
)

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')

@staticmethod
def _find_quality(qualities: typing.List[twitch.VideoQuality], max_quality: int = 720) -> int:
"""
47 changes: 41 additions & 6 deletions bot/downloader/twitter/client.py
Original file line number Diff line number Diff line change
@@ -84,12 +84,7 @@ async def relogin(self) -> None:
if self.client:
await self.client.pool.relogin(usernames=[self.username])

async def get_post(self, url: str) -> domain.Post:
uid, index = self._parse_url(url)

if self.client is None:
return await self._get_post_no_login(url=url, uid=uid, index=index or 0)

async def login(self) -> None:
if not self.logged_in:
await self.client.pool.add_account(
username=self.username,
@@ -99,8 +94,48 @@ async def get_post(self, url: str) -> domain.Post:
)
await self.client.pool.login_all()

async def get_post(self, url: str) -> domain.Post:
uid, index = self._parse_url(url)

if self.client is None:
return await self._get_post_no_login(url=url, uid=uid, index=index or 0)

await self.login()

return await self._get_post_login(url=url, uid=uid, index=index)

async def get_comments(
self,
url: str,
n: int = 5,
retry_count: int = 0,
) -> typing.List[domain.Comment]:
if not self.client:
raise exceptions.NotAllowedError('Twitter credentials not configured')

await self.login()

uid, _ = self._parse_url(url)
try:
replies = self.client.tweet_replies(twid=int(uid), limit=n)
except Exception as e:
logger.error('Failed fetching from twitter, retrying', error=str(e))
if retry_count == 0:
await self.relogin()
return await self.get_comments(url=url, n=n, retry_count=retry_count + 1)

raise exceptions.IntegrationClientError('Failed fetching from twitter') from e

return [
domain.Comment(
author=f'{reply.user.displayname} ({reply.user.username})',
created=reply.date.astimezone(),
likes=reply.likeCount,
comment=reply.rawContent,
)
async for reply in replies
][:n]

async def _get_post_login(
self,
url: str,
5 changes: 5 additions & 0 deletions bot/downloader/youtube/client.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@

from bot import constants
from bot import domain
from bot import exceptions
from bot import logger
from bot.downloader import base
from bot.downloader.youtube import config
@@ -43,10 +44,14 @@ async def get_post(self, url: str) -> domain.Post:
views=vid.views,
created=vid.publish_date,
buffer=io.BytesIO(),
spoiler=vid.age_restricted is True,
)

vid.streams.filter(progressive=True, file_extension='mp4').order_by(
'resolution'
).desc().first().stream_to_buffer(post.buffer)

return post

async def get_comments(self, url: str, n: int = 5) -> typing.List[domain.Comment]:
raise exceptions.NotSupportedError('get_comments')
5 changes: 5 additions & 0 deletions bot/exceptions.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,11 @@ def __init__(self, action: str) -> None:
super().__init__(f'Action not allowed: {action}')


class NotSupportedError(BaseError):
def __init__(self, action: str) -> None:
super().__init__(f'Action not supported: {action}')


class ConfigurationError(BaseError):
pass

72 changes: 72 additions & 0 deletions bot/integrations/discord/client.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
import datetime
import typing
from functools import partial
from itertools import batched

import discord
from discord import app_commands
@@ -51,6 +52,11 @@ def __init__(self, *, intents: discord.Intents, **options: typing.Any) -> None:
description='Embeds media directly into discord',
callback=self.embed_cmd,
),
app_commands.Command(
name='comments',
description='Fetches comments for a post',
callback=self.get_comments_cmd,
),
app_commands.Command(
name='help',
description='Prints configuration for this server',
@@ -181,6 +187,47 @@ async def embed_cmd(self, interaction: discord.Interaction, url: str, spoiler: b
author=interaction.user,
)

async def get_comments_cmd(
self,
interaction: discord.Interaction,
url: str,
n: int = 5,
spoiler: bool = False,
) -> None:
await interaction.response.defer()

if service.should_handle_url(url) is False:
return

try:
comments = await service.get_comments(
url=url,
n=n,
server_vendor=constants.ServerVendor.DISCORD,
server_uid=str(interaction.guild_id),
author_uid=str(interaction.user.id),
)
except Exception as e:
logger.error('Failed downloading', url=url, error=str(e))
await interaction.followup.send(
content=f'Failed fetching {url} ({interaction.user.mention}).\nError: {str(e)}',
view=CustomView(),
)
raise e

# Override spoiler
for comment in comments:
if not comment.spoiler:
comment.spoiler = spoiler

for batch in batched(comments, 5):
await self._send_comments(
url=url,
comments=batch,
send_func=partial(interaction.followup.send, view=CustomView()),
author=interaction.user,
)

async def help_cmd(self, interaction: discord.Interaction) -> None:
await interaction.response.defer()

@@ -308,3 +355,28 @@ async def _send_post(
return await self._send_post(post=post, send_func=send_func, author=author)

raise exceptions.BotError('Failed to send message') from e

async def _send_comments(
self,
url: str,
comments: typing.List[domain.Comment],
send_func: typing.Callable,
author: typing.Union[discord.User, discord.Member],
) -> discord.Message:
send_kwargs = {
'suppress_embeds': True,
}

content = f'Here you go {author.mention} {utils.random_emoji()}.\n{url}\n{domain.comments_to_string(comments)}'
if len(content) > 2000:
if any(comment.spoiler is True for comment in comments):
content = content[:1995] + '||...'
else:
content = content[:1997] + '...'

send_kwargs['content'] = content

try:
return await send_func(**send_kwargs)
except discord.HTTPException as e:
raise exceptions.BotError('Failed to send message') from e
73 changes: 73 additions & 0 deletions bot/service.py
Original file line number Diff line number Diff line change
@@ -141,3 +141,76 @@ async def get_post( # noqa: C901
)

return post


async def get_comments( # noqa: C901
url: str,
n: int,
server_vendor: constants.ServerVendor,
server_uid: str,
author_uid: str,
) -> typing.List[domain.Comment]:
# TODO: Refactor
if n > 15:
raise exceptions.NotAllowedError('Can\'t fetch more than 15 comments')

try:
client = registry.get_instance(url)
except ValueError as e:
logger.warning('No strategy for url', url=url, error=str(e))
return None

if not client:
logger.warning('Integration for url not enabled or client init failure', url=url)
return None

# Check if server is throttled and allowed to post
server = repository.get_server(
vendor=server_vendor,
vendor_uid=server_uid,
)
if not server:
logger.info(
'Server not configured, creating a default config',
server_vendor_uid=server_uid,
server_vendor=server_vendor.value,
)
server = repository.create_server(vendor=server_vendor, vendor_uid=server_uid)

if not server._internal_id:
logger.error('Internal id for server not set')
raise exceptions.BotError('Internal server error')

num_posts_in_server = repository.get_number_of_posts_in_server_from_datetime(
server_id=server._internal_id,
from_datetime=datetime.datetime.now() - datetime.timedelta(days=1),
)

if not server.can_post(num_posts_in_one_day=num_posts_in_server, integration=client.INTEGRATION):
logger.warning(
'Server is not allowed to post',
server_vendor=server_vendor.value,
server_vendor_uid=server_uid,
server_tier=server.tier.name,
)
raise exceptions.NotAllowedError('Upgrade your tier')

# Check if user is banned
if repository.is_member_banned_from_server(
server_vendor=server_vendor,
server_uid=server_uid,
member_uid=author_uid,
):
logger.warning(
'User banned from server',
user=author_uid,
server_vendor=server_vendor.value,
server_vendor_uid=server_uid,
)
raise exceptions.NotAllowedError('User banned')

try:
return await client.get_comments(url=url, n=n)
except Exception as e:
logger.error('Failed downloading', url=url, num_comments=n, error=str(e))
raise e