Skip to content

Commit

Permalink
Threads
Browse files Browse the repository at this point in the history
  • Loading branch information
amadejkastelic committed Aug 22, 2024
1 parent eefd99b commit 2689f9e
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 196 deletions.
61 changes: 47 additions & 14 deletions bot/downloader/threads/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import io
import json
import re
import typing
Expand All @@ -10,6 +11,7 @@
from bot import domain
from bot import exceptions
from bot import logger
from bot.common import utils
from bot.downloader import base
from bot.downloader.threads import config
from bot.downloader.threads import types
Expand Down Expand Up @@ -54,23 +56,48 @@ async def get_integration_data(self, url: str) -> typing.Tuple[constants.Integra

async def get_post(self, url: str) -> domain.Post:
_, url_id, _ = await self.get_integration_data(url)
api_token = self._get_threads_api_token()

thread = self._get_thread(url_id)
thread = self._get_thread(url_id=url_id, api_token=api_token)

if len(thread.data.data.edges) == 0 or len(thread.data.data.edges[0].node.thread_items) == 0:
raise exceptions.IntegrationClientError('No threads found')

thread = thread.data.data.edges[0].node.thread_items[0].post

return domain.Post(
post = domain.Post(
url=url,
author=thread.user.username,
description=thread.caption.text,
likes=thread.like_count,
created=datetime.datetime.fromtimestamp(thread.taken_at),
)

def _get_thread_id(self, url_id: str):
headers = HEADERS | {'X-FB-LSD': api_token}

media_url = None
match thread.media_type:
case types.MediaType.IMAGE:
media_url = self._find_suitable_image_url(thread.image_versions2.candidates)
case types.MediaType.VIDEO:
media_url = thread.video_versions[0].url
case types.MediaType.CAROUSEL:
post.buffer = utils.combine_images(
[
await self._download(img.image_versions2.candidates[0].url, headers=headers)
for img in thread.carousel_media
]
)

logger.debug('Fetched thread', media_type=thread.media_type, url=url, media_url=media_url)

if media_url:
with requests.get(url=media_url, timeout=base.DEFAULT_TIMEOUT) as resp:
post.buffer = io.BytesIO(resp.content)

return post

def _get_thread_id(self, url_id: str) -> str:
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'

thread_id = 0
Expand All @@ -80,29 +107,24 @@ def _get_thread_id(self, url_id: str):

return thread_id

def _get_thread(self, url_id: str) -> types.Thread:
thread_id = self._get_thread_id(url_id)
api_token = self._get_threads_api_token()

response = requests.post(
def _get_thread_raw(self, url_id: str, api_token: str) -> dict:
return requests.post(
url='https://www.threads.net/api/graphql',
timeout=base.DEFAULT_TIMEOUT,
headers=HEADERS | {'X-FB-LSD': api_token},
data={
'lsd': api_token,
'variables': json.dumps(
{
'postID': thread_id,
'postID': self._get_thread_id(url_id),
},
),
'doc_id': '25460088156920903',
},
)
).json()

with open('temp.json', 'w') as f:
f.write(json.dumps(response.json()))

return types.Thread.model_validate(response.json())
def _get_thread(self, url_id: str, api_token: str) -> types.Thread:
return types.Thread.model_validate(self._get_thread_raw(url_id=url_id, api_token=api_token))

def _get_threads_api_token(self) -> str:
response = requests.get(
Expand All @@ -116,3 +138,14 @@ def _get_threads_api_token(self) -> str:
token = token_key_value.split('"')[0]

return token

@staticmethod
def _find_suitable_image_url(candidates: typing.List[types.Candidate], max_quality: int = 1440) -> str:
"""
Returns image url with highest quality that is below max quality
"""
return sorted(
list(filter(lambda candidate: candidate.width <= max_quality, candidates)),
key=lambda candidate: candidate.width,
reverse=True,
)[0].url
27 changes: 23 additions & 4 deletions bot/downloader/threads/types.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import enum
import typing

import pydantic


class MediaType(enum.IntEnum):
IMAGE = 1
VIDEO = 2
CAROUSEL = 8
COMMENT = 19


class BaseModel(pydantic.BaseModel):
model_config = pydantic.ConfigDict(extra='ignore')

Expand All @@ -17,9 +25,9 @@ class User(BaseModel):


class Candidate(BaseModel):
height: typing.Optional[int]
height: typing.Optional[int] = None
url: typing.Optional[str]
width: typing.Optional[int]
width: typing.Optional[int] = None


class ImageVersions2(BaseModel):
Expand All @@ -35,6 +43,17 @@ class Caption(BaseModel):
text: typing.Optional[str]


class CarouselMedia(BaseModel):
image_versions2: typing.Optional[ImageVersions2]
video_versions: typing.Optional[typing.List[VideoVersion]]
accessibility_caption: typing.Optional[str]
has_audio: typing.Optional[bool]
original_height: typing.Optional[int]
original_width: typing.Optional[int]
pk: typing.Optional[str]
id: typing.Optional[str]


class Post(BaseModel):
user: typing.Optional[User]
accessibility_caption: typing.Optional[str]
Expand All @@ -43,10 +62,10 @@ class Post(BaseModel):
original_height: typing.Optional[int]
code: typing.Optional[str]
video_versions: typing.Optional[typing.List[VideoVersion]]
carousel_media: typing.Optional[str]
carousel_media: typing.Optional[typing.List[CarouselMedia]]
pk: typing.Optional[str]
id: typing.Optional[str]
media_type: typing.Optional[int]
media_type: typing.Optional[typing.Union[MediaType, int]]
has_audio: typing.Optional[bool]
audio: typing.Optional[str]
taken_at: typing.Optional[int]
Expand Down
Loading

0 comments on commit 2689f9e

Please sign in to comment.