From d5f95494ed9857242736c81043eed8d1cd54012d Mon Sep 17 00:00:00 2001 From: Aaron Tan <70739609+aaron-tan@users.noreply.github.com> Date: Thu, 17 Aug 2023 13:49:07 +1000 Subject: [PATCH 1/4] [caffeine.tv] Add new extractor Add CaffeineIE info extractor to support site caffeine.tv --- youtube_dl/extractor/caffeine.py | 43 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/caffeine.py diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py new file mode 100644 index 00000000000..cd7b62c9e3a --- /dev/null +++ b/youtube_dl/extractor/caffeine.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + +import re + + +class CaffeineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/.+/video/(?P[0-9a-f-]+)' + _TEST = { + 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', + 'info_dict': { + 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', + 'ext': 'mp4', + 'title': 'GOOOOD MORNINNNNN #highlights', + 'uploader': 'TsuSurf', + 'duration': 3145, + } + } + + def _real_extract(self, url): + video_id = re.match(self._VALID_URL, url).group('video_id') + json_data = self._download_json('https://api.caffeine.tv/social/public/activity/' + video_id, video_id) + broadcast_info = json_data['broadcast_info'] + title = broadcast_info['broadcast_title'] + video_url = broadcast_info['video_url'] + + formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'uploader': json_data['username'], + 'duration': int_or_none(broadcast_info['content_duration']), + 'like_count': int_or_none(json_data['like_count']), + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cb39876c279..71993596c6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -159,6 +159,7 @@ from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .caffeine import CaffeineIE from .callin import CallinIE from .camdemy import ( CamdemyIE, From 2638cab322d2baf984c9dca0dc29fcf9368cc51e Mon Sep 17 00:00:00 2001 From: Aaron Tan <70739609+aaron-tan@users.noreply.github.com> Date: Thu, 17 Aug 2023 14:04:58 +1000 Subject: [PATCH 2/4] [caffeine.tv] Refactor info extractor Refactor code to better reflect coding conventions --- youtube_dl/extractor/caffeine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py index cd7b62c9e3a..ea773baf3d0 100644 --- a/youtube_dl/extractor/caffeine.py +++ b/youtube_dl/extractor/caffeine.py @@ -25,7 +25,7 @@ class CaffeineIE(InfoExtractor): def _real_extract(self, url): video_id = re.match(self._VALID_URL, url).group('video_id') json_data = self._download_json('https://api.caffeine.tv/social/public/activity/' + video_id, video_id) - broadcast_info = json_data['broadcast_info'] + broadcast_info = json_data.get('broadcast_info') title = broadcast_info['broadcast_title'] video_url = broadcast_info['video_url'] From 8d091a0dea5fa7ac30afb3f7bbcd7a571eb9ec27 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 8 Feb 2024 23:55:03 +0000 Subject: [PATCH 3/4] Update youtube_dl/extractor/caffeine.py --- youtube_dl/extractor/caffeine.py | 70 ++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py index ea773baf3d0..bffedb9a736 100644 --- a/youtube_dl/extractor/caffeine.py +++ b/youtube_dl/extractor/caffeine.py @@ -3,41 +3,77 @@ from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, + merge_dicts, + parse_iso8601, + T, + traverse_obj, + txt_or_none, + urljoin, ) -import re - -class CaffeineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/.+/video/(?P[0-9a-f-]+)' - _TEST = { +class CaffeineTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P[0-9a-f-]+)' + _TESTS = [{ 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', 'info_dict': { 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', 'ext': 'mp4', 'title': 'GOOOOD MORNINNNNN #highlights', + 'timestamp': 1654702180, + 'upload_date': '20220608', 'uploader': 'TsuSurf', 'duration': 3145, - } - } + 'age_limit': 17, + }, + 'params': { + 'format': 'bestvideo', + }, + }] def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group('video_id') - json_data = self._download_json('https://api.caffeine.tv/social/public/activity/' + video_id, video_id) - broadcast_info = json_data.get('broadcast_info') + video_id = self._match_id(url) + json_data = self._download_json( + 'https://api.caffeine.tv/social/public/activity/' + video_id, + video_id) + broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {} title = broadcast_info['broadcast_title'] video_url = broadcast_info['video_url'] - formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4') + ext = determine_ext(video_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8', + fatal=False) + else: + formats = [{'url': video_url}] self._sort_formats(formats) - return { + return merge_dicts({ 'id': video_id, 'title': title, - 'uploader': json_data['username'], - 'duration': int_or_none(broadcast_info['content_duration']), - 'like_count': int_or_none(json_data['like_count']), 'formats': formats, - } + }, traverse_obj(json_data, { + 'uploader': ((None, 'user'), 'username'), + }, get_all=False), traverse_obj(json_data, { + 'like_count': ('like_count', T(int_or_none)), + 'view_count': ('view_count', T(int_or_none)), + 'comment_count': ('comment_count', T(int_or_none)), + 'tags': ('tags', Ellipsis, T(txt_or_none)), + 'is_live': 'is_live', + 'uploader': ('user', 'name'), + }), traverse_obj(broadcast_info, { + 'duration': ('content_duration', T(int_or_none)), + 'timestamp': ('broadcast_start_time', T(parse_iso8601)), + 'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))), + 'age_limit': ('content_rating', T(lambda r: r and { + # assume Apple Store ratings [1] + # 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system + 'FOUR_PLUS': 0, + 'NINE_PLUS': 9, + 'TWELVE_PLUS': 12, + 'SEVENTEEN_PLUS': 17, + }.get(r, 17))), + })) From a6754081fd639d1fee65b557a6e5e9d02938779a Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 8 Feb 2024 23:55:57 +0000 Subject: [PATCH 4/4] Update youtube_dl/extractor/extractors.py --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 71993596c6e..c2eb0a57c5b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -159,7 +159,7 @@ from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE -from .caffeine import CaffeineIE +from .caffeine import CaffeineTVIE from .callin import CallinIE from .camdemy import ( CamdemyIE,