diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py index d292505458b7..9e914def46c1 100755 --- a/devscripts/install_deps.py +++ b/devscripts/install_deps.py @@ -48,7 +48,8 @@ def main(): def yield_deps(group): for dep in group: - if mobj := recursive_pattern.fullmatch(dep): + mobj = recursive_pattern.fullmatch(dep) + if mobj: yield from optional_groups.get(mobj.group('group_name'), []) else: yield dep diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 8e199e7d0e8b..52261e375347 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -35,7 +35,7 @@ class CommitGroup(enum.Enum): MISC = 'Misc.' @classmethod - @lru_cache + @lru_cache() def subgroup_lookup(cls): return { name: group @@ -56,7 +56,7 @@ def subgroup_lookup(cls): } @classmethod - @lru_cache + @lru_cache() def group_lookup(cls): result = { 'fd': cls.DOWNLOADER, diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py index ac9ea3170738..5102216f07ef 100755 --- a/devscripts/tomlparse.py +++ b/devscripts/tomlparse.py @@ -64,19 +64,22 @@ def get_target(root: dict, paths: list[str], is_list=False): def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): index += 1 - if match := ws_re.match(data, index): + match = ws_re.match(data, index) + if match: index = match.end() while data[index] != end: index = yield True, index - if match := ws_re.match(data, index): + match = ws_re.match(data, index) + if match: index = match.end() if data[index] == ',': index += 1 - if match := ws_re.match(data, index): + match = ws_re.match(data, index) + if match: index = match.end() assert data[index] == end @@ -106,7 +109,8 @@ def parse_value(data: str, index: int): return index, result - if match := STRING_RE.match(data, index): + match = STRING_RE.match(data, index) + if match: return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] match = LEFTOVER_VALUE_RE.match(data, index) diff --git a/pyproject.toml b/pyproject.toml index 96cb368b6d91..3ef73a682caa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ maintainers = [ ] description = "A feature-rich command-line audio/video downloader" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.7" keywords = [ "youtube-dl", "video-downloader", @@ -28,6 +28,7 @@ classifiers = [ "Environment :: Console", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -48,7 +49,7 @@ dependencies = [ "pycryptodomex", "requests>=2.31.0,<3", "urllib3>=1.26.17,<3", - "websockets>=12.0", + "websockets>=11.0.0", ] [project.optional-dependencies] diff --git a/setup.cfg b/setup.cfg index 340cc3b4d999..6e1233e18be7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,12 +16,15 @@ remove-unused-variables = true [tox:tox] skipsdist = true -envlist = py{38,39,310,311,312},pypy{38,39,310} +envlist = py{37,38,39,310,311,312},pypy{37,38,39,310} skip_missing_interpreters = true [testenv] # tox deps = pytest + requests + curl-cffi + websockets commands = pytest {posargs:"-m not download"} passenv = HOME # For test_compat_expanduser setenv = @@ -29,7 +32,7 @@ setenv = [isort] -py_version = 38 +py_version = 37 multi_line_output = VERTICAL_HANGING_INDENT line_length = 80 reverse_relative = true diff --git a/test/test_traversal.py b/test/test_traversal.py index 9b2a27b0807f..e8b9517e2c48 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -402,7 +402,7 @@ def test_traversal_morsel(self): 'secure': 'f', 'httponly': 'g', 'version': 'h', - 'samesite': 'i', + # 'samesite': 'i', } morsel = http.cookies.Morsel() morsel.set('item_key', 'item_value', 'coded_value') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c6f695d091f..7c25aabda935 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -718,7 +718,8 @@ def check_deprecated(param, option, suggestion): for msg in self.params.get('_deprecation_warnings', []): self.deprecated_feature(msg) - if impersonate_target := self.params.get('impersonate'): + impersonate_target = self.params.get('impersonate') + if impersonate_target: if not self._impersonate_target_available(impersonate_target): raise YoutubeDLError( f'Impersonate target "{impersonate_target}" is not available. ' @@ -2680,10 +2681,14 @@ def _fill_common_fields(self, info_dict, final=True): if new_key in info_dict and old_key in info_dict: if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present') - elif old_value := info_dict.get(old_key): - info_dict[new_key] = old_value.split(', ') - elif new_value := info_dict.get(new_key): - info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value) + else: + old_value = info_dict.get(old_key) + if old_value: + info_dict[new_key] = old_value.split(', ') + else: + new_value = info_dict.get(new_key) + if new_value: + info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value) def _raise_pending_errors(self, info): err = info.pop('__pending_error', None) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3d606bcba254..82a01fa919d4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,8 +1,8 @@ import sys -if sys.version_info < (3, 8): +if sys.version_info < (3, 7): raise ImportError( - f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 + f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541 __license__ = 'The Unlicense' diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py index 36c983642df1..ec003ea90e07 100644 --- a/yt_dlp/compat/functools.py +++ b/yt_dlp/compat/functools.py @@ -10,3 +10,17 @@ cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) + +try: + cached_property # >= 3.8 +except NameError: + class cached_property: + def __init__(self, func): + update_wrapper(self, func) + self.func = func + + def __get__(self, instance, _): + if instance is None: + return self + setattr(instance, self.func.__name__, self.func(instance)) + return getattr(instance, self.func.__name__) diff --git a/yt_dlp/compat/shlex.py b/yt_dlp/compat/shlex.py new file mode 100644 index 000000000000..ddd18094d0e2 --- /dev/null +++ b/yt_dlp/compat/shlex.py @@ -0,0 +1,15 @@ +# flake8: noqa: F405 +from shlex import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'shlex') +del passthrough_module + + +try: + join +except NameError: + def join(split_command): + """Return a shell-escaped string from *split_command*.""" + return ' '.join(quote(arg) for arg in split_command) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 65a0d6f23483..46429ddd6c7a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -371,7 +371,9 @@ def with_fields(*tups, default=''): if s['status'] != 'downloading': return - if update_delta := self.params.get('progress_delta'): + update_delta = self.params.get('progress_delta') + + if update_delta: with self._progress_delta_lock: if time.monotonic() < self._progress_delta_time: return diff --git a/yt_dlp/extractor/asobistage.py b/yt_dlp/extractor/asobistage.py index 8fa8f3edb634..4ff6ee012b09 100644 --- a/yt_dlp/extractor/asobistage.py +++ b/yt_dlp/extractor/asobistage.py @@ -1,4 +1,4 @@ -import functools +from ..compat import functools from .common import InfoExtractor from ..utils import str_or_none, url_or_none diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b361f87..d8dc11beed4d 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1424,14 +1424,16 @@ def extract_all(pattern): 'model', 'blocks', is_type('media'), 'model', 'blocks', is_type('mediaMetadata'), 'model', {dict}, any)) - if model and (entry := parse_model(model)): - if not entry.get('timestamp'): - entry['timestamp'] = traverse_obj(next_data, ( - ..., 'contents', is_type('timestamp'), 'model', - 'timestamp', {functools.partial(int_or_none, scale=1000)}, any)) - entries.append(entry) - return self.playlist_result( - entries, playlist_id, playlist_title, playlist_description) + if model: + entry = parse_model(model) + if entry: + if not entry.get('timestamp'): + entry['timestamp'] = traverse_obj(next_data, ( + ..., 'contents', is_type('timestamp'), 'model', + 'timestamp', {functools.partial(int_or_none, scale=1000)}, any)) + entries.append(entry) + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1d2c443c0b75..0771cc3a3c10 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,5 @@ import base64 import collections -import functools import getpass import hashlib import http.client @@ -22,6 +21,7 @@ import urllib.request import xml.etree.ElementTree +from ..compat import functools # isort: split from ..compat import ( compat_etree_fromstring, compat_expanduser, diff --git a/yt_dlp/extractor/crtvg.py b/yt_dlp/extractor/crtvg.py index 21325e331dfc..76d380640028 100644 --- a/yt_dlp/extractor/crtvg.py +++ b/yt_dlp/extractor/crtvg.py @@ -39,7 +39,10 @@ def _real_extract(self, url): formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False)) old_video_id = None - if mobj := re.fullmatch(r'[^/#?]+-(?P\d{7})', video_id): + + mobj = re.fullmatch(r'[^/#?]+-(?P\d{7})', video_id) + + if mobj: old_video_id = [make_archive_id(self, mobj.group('old_id'))] return { diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index ea54f019511c..eb40174962fc 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -62,7 +62,8 @@ def _request_token(self, headers, data, note='Requesting token', errnote='Failed except ExtractorError as error: if not isinstance(error.cause, HTTPError) or error.cause.status != 403: raise - if target := error.cause.response.extensions.get('impersonate'): + target = error.cause.response.extensions.get('impersonate') + if target: raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"') raise ExtractorError( 'Request blocked by Cloudflare. ' @@ -234,7 +235,8 @@ def _extract_stream(self, identifier, display_id=None): # Invalidate stream token to avoid rate-limit error_msg = 'Unable to invalidate stream token; you may experience rate-limiting' - if stream_token := stream_response.get('token'): + stream_token = stream_response.get('token') + if stream_token: self._request_webpage(Request( f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive', headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False) diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index 50e4136b578a..d194bb2e3a23 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -132,8 +132,10 @@ def _real_extract(self, url): if error_info.get('code') == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + else: + msg = error_info.get('message') + if msg: + raise ExtractorError(msg) raise m3u8_url = traverse_obj(details, ( diff --git a/yt_dlp/extractor/elementorembed.py b/yt_dlp/extractor/elementorembed.py index 638893f6f6ed..0e0b5650af60 100644 --- a/yt_dlp/extractor/elementorembed.py +++ b/yt_dlp/extractor/elementorembed.py @@ -56,13 +56,16 @@ class ElementorEmbedIE(InfoExtractor): def _extract_from_webpage(self, url, webpage): for data_settings in re.findall(self._WIDGET_REGEX, webpage): data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML) - if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})): + youtube_url = traverse_obj(data, ('youtube_url', {url_or_none})) + if youtube_url: yield self.url_result(youtube_url, ie=YoutubeIE) for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})): - if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})): + youtube_url = traverse_obj(video, ('youtube_url', {url_or_none})) + if youtube_url: yield self.url_result(youtube_url, ie=YoutubeIE) - if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})): + vimeo_url = traverse_obj(video, ('vimeo_url', {url_or_none})) + if vimeo_url: yield self.url_result(vimeo_url, ie=VimeoIE) for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})): yield { diff --git a/yt_dlp/extractor/err.py b/yt_dlp/extractor/err.py index abd00f2d5843..29d1e85441e5 100644 --- a/yt_dlp/extractor/err.py +++ b/yt_dlp/extractor/err.py @@ -194,7 +194,9 @@ def _real_extract(self, url): format_url, video_id, mpd_id='dash', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})): + + format_url = traverse_obj(media_data, ('src', 'file', {url_or_none})) + if format_url: formats.append({ 'url': format_url, 'format_id': 'http', diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 7b8f7dd0404f..21d4b1ea823f 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -119,7 +119,8 @@ def _extract_video(self, video_id, hostname=None): video_url = video['url'] format_id = video.get('format') - if token_url := url_or_none(video.get('token')): + token_url = url_or_none(video.get('token')) + if token_url: tokenized_url = traverse_obj(self._download_json( token_url, video_id, f'Downloading signed {format_id} manifest URL', fatal=False, query={ @@ -138,7 +139,8 @@ def _extract_video(self, video_id, hostname=None): fmts, subs = self._extract_m3u8_formats_and_subtitles( video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None): - if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P\d+)', f['format_id']): + mobj = re.match(rf'{format_id}-[Aa]udio-\w+-(?P\d+)', f['format_id']) + if mobj: f.update({ 'tbr': int_or_none(mobj.group('bitrate')), 'acodec': 'mp4a', diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py index bb1554eea429..4ea7d2e8b73b 100644 --- a/yt_dlp/extractor/gbnews.py +++ b/yt_dlp/extractor/gbnews.py @@ -49,7 +49,7 @@ class GBNewsIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }] - @functools.lru_cache + @functools.lru_cache() def _get_ss_endpoint(self, data_id, data_env): if not data_id: data_id = 'GB003' diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index 276a6c7fe918..318713c8af71 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -50,7 +50,8 @@ def _real_extract(self, url): video_url = api_data['streamUrl'] is_live = api_data.get('isLive') or False - if (ext := determine_ext(video_url)) == 'm3u8': + ext = determine_ext(video_url) + if ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles( video_url, display_id, live=is_live) elif ext == 'mp4': diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py index 35fb3fd6b121..f94e0c5f8d41 100644 --- a/yt_dlp/extractor/jiosaavn.py +++ b/yt_dlp/extractor/jiosaavn.py @@ -1,7 +1,8 @@ -import functools import math import re +from ..compat import functools + from .common import InfoExtractor from ..utils import ( InAdvancePagedList, @@ -24,7 +25,8 @@ class JioSaavnBaseIE(InfoExtractor): @functools.cached_property def requested_bitrates(self): requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn') - if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES: + invalid_bitrates = set(requested_bitrates) - self._VALID_BITRATES + if invalid_bitrates: raise ValueError( f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. ' + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}') @@ -65,7 +67,8 @@ def _extract_song(self, song_data, url=None): 'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}), 'webpage_url': ('perma_url', {url_or_none}), }) - if webpage_url := info.get('webpage_url') or url: + webpage_url = info.get('webpage_url') + if webpage_url or url: info['display_id'] = url_basename(webpage_url) info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])] diff --git a/yt_dlp/extractor/maariv.py b/yt_dlp/extractor/maariv.py index 425a8b3b4a6d..89ddfc61446f 100644 --- a/yt_dlp/extractor/maariv.py +++ b/yt_dlp/extractor/maariv.py @@ -41,7 +41,9 @@ def _real_extract(self, url): f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data'] formats = [] - if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})): + + hls_url = traverse_obj(data, ('video', 'url', {url_or_none})) + if hls_url: formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False)) for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})): diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 67e52efd6573..b1789558d65a 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -138,7 +138,8 @@ def _perform_login(self, username, password): 'username': username, 'password': password, })) - if errors := traverse_obj(result, ('errors', ..., {str})): + errors = traverse_obj(result, ('errors', ..., {str})) + if errors: raise ExtractorError(', '.join(errors) or 'Unknown Error', expected=True) def _real_extract(self, url): diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index 968c9728b087..1c1f825318fd 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -15,7 +15,8 @@ class NFBBaseIE(InfoExtractor): def _extract_ep_data(self, webpage, video_id, fatal=False): return self._search_json( - r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {} + r'const\s+episodesData\s*=', webpage, 'episode data', video_id, + contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or [] def _extract_ep_info(self, data, video_id, slug=None): info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], { @@ -229,7 +230,8 @@ def _real_extract(self, url): formats, subtitles = self._extract_m3u8_formats_and_subtitles( player_data['source'], video_id, 'mp4', m3u8_id='hls') - if dv_source := url_or_none(player_data.get('dvSource')): + dv_source = self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None) + if dv_source: fmts, subs = self._extract_m3u8_formats_and_subtitles( dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False) for fmt in fmts: diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 8bb017a73210..9260edfb1210 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -417,7 +417,8 @@ def suitable(cls, url): def _extract_meta_from_class_elements(self, class_values, html): for class_value in class_values: - if value := clean_html(get_element_by_class(class_value, html)): + value = clean_html(get_element_by_class(class_value, html)) + if value: return value def _real_extract(self, url): @@ -427,7 +428,8 @@ def _real_extract(self, url): def entries(): for episode in episodes: - if episode_path := episode.get('url'): + episode_path = episode.get('url') + if episode_path: yield self._extract_episode_info(urljoin(url, episode_path), episode) html = self._download_webpage(url, program_id) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index b04ce96154b0..35ab34eb8345 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -401,7 +401,8 @@ def _yield_dmc_formats(self, api_data, video_id): return for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols): - if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol): + fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol) + if fmt: yield fmt def _yield_dms_formats(self, api_data, video_id): diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py index 3db663ded0dd..9e3292a5e79b 100644 --- a/yt_dlp/extractor/nuum.py +++ b/yt_dlp/extractor/nuum.py @@ -20,7 +20,8 @@ def _call_api(self, path, video_id, description, query={}): f'https://nuum.ru/api/v2/{path}', video_id, query=query, note=f'Downloading {description} metadata', errnote=f'Unable to download {description} metadata') - if error := response.get('error'): + error = response.get('error') + if error: raise ExtractorError(f'API returned error: {error!r}') return response['result'] diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6c441ff34cda..676c48b3d63b 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -294,7 +294,8 @@ def _real_extract(self, url): })) elif include_type == 'post_tag': - if post_tag := traverse_obj(include, ('attributes', 'value', {str})): + post_tag = traverse_obj(include, ('attributes', 'value', {str})) + if post_tag: info.setdefault('tags', []).append(post_tag) elif include_type == 'campaign': diff --git a/yt_dlp/extractor/sharepoint.py b/yt_dlp/extractor/sharepoint.py index d4d5af04f086..0c2f57b67b43 100644 --- a/yt_dlp/extractor/sharepoint.py +++ b/yt_dlp/extractor/sharepoint.py @@ -90,7 +90,8 @@ def _real_extract(self, url): base_media_url, video_id, 'mp4', m3u8_id=hls_type, query={'format': hls_type}, fatal=False, quality=-2)) - if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})): + video_url = traverse_obj(video_data, ('downloadUrl', {url_or_none})) + if video_url: formats.append({ 'url': video_url, 'ext': determine_ext(video_data.get('extension') or video_data.get('name')), diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 358146171f12..fb41c527df0b 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,10 +1,9 @@ -import functools import itertools import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..compat import compat_str +from ..compat import compat_str, functools from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -133,7 +132,8 @@ def _verify_oauth_token(self, token): def _real_initialize(self): if self._HEADERS: return - if token := try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value): + token = try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value) + if token: self._verify_oauth_token(token) def _perform_login(self, username, password): @@ -296,7 +296,8 @@ def add_format(f, protocol, is_preview=False): protocol = 'hls-aes' ext = None - if preset := traverse_obj(t, ('preset', {str_or_none})): + preset = traverse_obj(t, ('preset', {str_or_none})) + if preset: ext = preset.split('_')[0] if ext not in KNOWN_EXTENSIONS: ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str}))) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 7bcfdedbeae7..e5529862f25d 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -52,21 +52,7 @@ class TikTokBaseIE(InfoExtractor): _APP_INFO = None _APP_USER_AGENT = None - @functools.cached_property - def _KNOWN_APP_INFO(self): - # If we have a genuine device ID, we may not need any IID - default = [''] if self._KNOWN_DEVICE_ID else [] - return self._configuration_arg('app_info', default, ie_key=TikTokIE) - - @functools.cached_property - def _KNOWN_DEVICE_ID(self): - return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0] - - @functools.cached_property - def _DEVICE_ID(self): - return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000)) - - @functools.cached_property + @property def _API_HOSTNAME(self): return self._configuration_arg( 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] @@ -215,24 +201,27 @@ def _extract_aweme_app(self, aweme_id): def _extract_web_data_and_status(self, url, video_id, fatal=True): webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or '' video_data, status = {}, None - - if universal_data := self._get_universal_data(webpage, video_id): + universal_data = self._get_universal_data(webpage, video_id) + if universal_data: self.write_debug('Found universal data for rehydration') status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) - elif sigi_data := self._get_sigi_state(webpage, video_id): - self.write_debug('Found sigi state data') - status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 - video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) - - elif next_data := self._search_nextjs_data(webpage, video_id, default={}): - self.write_debug('Found next.js data') - status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 - video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) + else: + sigi_data = self._get_sigi_state(webpage, video_id) + if sigi_data: + self.write_debug('Found sigi state data') + status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 + video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) - elif fatal: - raise ExtractorError('Unable to extract webpage video data') + else: + next_data = self._search_nextjs_data(webpage, video_id, default={}) + if next_data: + self.write_debug('Found next.js data') + status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 + video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) + elif fatal: + raise ExtractorError('Unable to extract webpage video data') return video_data, status @@ -472,7 +461,8 @@ def _extract_web_formats(self, aweme_detail): 'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})), }) - if dimension := (res and int(res[:-1])): + dimension = (res and int(res[:-1])) + if dimension: if dimension == 540: # '540p' is actually 576p dimension = 576 if ratio < 1: # portrait: res/dimension is width @@ -860,7 +850,29 @@ def _real_extract(self, url): self.report_warning(f'{e}; trying with webpage') url = self._create_url(user_id, video_id) - video_data, status = self._extract_web_data_and_status(url, video_id) + webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}) + + universal_data = self._get_universal_data(webpage, video_id) + if universal_data: + next_data = sigi_data = universal_data = NotImplemented + self.write_debug('Found universal data for rehydration') + status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 + video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) + + else: + sigi_data = self._get_sigi_state(webpage, video_id) + if sigi_data: + self.write_debug('Found sigi state data') + status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 + video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) + else: + next_data = self._search_nextjs_data(webpage, video_id, default='{}') + if next_data: + self.write_debug('Found next.js data') + status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 + video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) + else: + raise ExtractorError('Unable to extract webpage video data') if video_data and status == 0: return self._parse_aweme_video_web(video_data, url, video_id) @@ -980,7 +992,8 @@ def _get_sec_uid(self, user_url, user_name, msg): def _real_extract(self, url): user_name, sec_uid = self._match_id(url), None - if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name): + mobj = re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name) + if mobj: user_name, sec_uid = None, mobj.group(0) else: sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user') diff --git a/yt_dlp/extractor/trtworld.py b/yt_dlp/extractor/trtworld.py index dbb72a4fe777..c0d551e98f77 100644 --- a/yt_dlp/extractor/trtworld.py +++ b/yt_dlp/extractor/trtworld.py @@ -85,7 +85,8 @@ def _real_extract(self, url): 'url': media_url, }) if not formats: - if youtube_id := traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')): + youtube_id = traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')) + if youtube_id: return self.url_result(youtube_id, 'Youtube') raise ExtractorError('No video found', expected=True) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 1a11162a0b13..f11667a51078 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,4 +1,3 @@ -import functools import json import random import re @@ -9,6 +8,7 @@ compat_parse_qs, compat_urllib_parse_unquote, compat_urllib_parse_urlparse, + functools, ) from ..networking.exceptions import HTTPError from ..utils import ( @@ -104,7 +104,8 @@ def _extract_variant_formats(self, variant, video_id): variant_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None): - if mobj := re.match(r'hls-[Aa]udio-(?P\d{4,})', f['format_id']): + mobj = re.match(r'hls-[Aa]udio-(?P\d{4,})', f['format_id']) + if mobj: f['tbr'] = int_or_none(mobj.group('bitrate'), 1000) return fmts, subs else: diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index b96112360425..5b2dfca401c1 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -1,4 +1,4 @@ -import functools +from ..compat import functools import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 0e047aa16191..69904a48e33f 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -247,7 +247,8 @@ def _entries(self, url, pl_id, html=None, page_num=None): if not html: return for element in get_elements_html_by_class('video-title', html): - if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})): + video_url = traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})) + if video_url: yield self.url_result(video_url) if page_num is not None: @@ -481,8 +482,8 @@ def _real_extract(self, url):
]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*> (?P[\s\S]+?)(?:
\s*){6,} ''' - - if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''): + infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='') + if infos: infos = re.sub( r'(?:\s*nl=nl)+\s*', ' ', re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '') diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 54da4e3622b6..1b6a301b8dcb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -11,7 +11,6 @@ import os.path import random import re -import shlex import sys import threading import time @@ -20,7 +19,7 @@ from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper -from ..compat import functools +from ..compat import functools, shlex from ..jsinterp import JSInterpreter from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( @@ -3316,7 +3315,8 @@ def _extract_heatmap(self, data): def _extract_comment(self, entities, parent=None): comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict})) - if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))): + comment_id = traverse_obj(comment_entity_payload, ('properties', 'commentId', {str})) + if not comment_id: return toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict})) @@ -3704,7 +3704,8 @@ def _get_requested_clients(self, url, smuggled_data): def _invalid_player_response(self, pr, video_id): # YouTube may return a different video player response than expected. # See: https://github.com/TeamNewPipe/NewPipe/issues/8713 - if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id: + pr_id = traverse_obj(pr, ('videoDetails', 'videoId')) + if pr_id != video_id: return pr_id def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): @@ -3760,7 +3761,8 @@ def append_client(*client_names): self.report_warning(e) continue - if pr_id := self._invalid_player_response(pr, video_id): + pr_id = self._invalid_player_response(pr, video_id) + if pr_id: skipped_clients[client] = pr_id elif pr: # Save client name for introspection later @@ -4457,10 +4459,14 @@ def process_language(container, base_url, lang_code, sub_name, query): release_date = release_date.replace('-', '') if not release_year: release_year = release_date[:4] + artists = mobj.group('clean_artist') + if artists: + artists = [artists] + else: + artists = [a.strip() for a in mobj.group('artist').split('·')] info.update({ 'album': mobj.group('album'.strip()), - 'artists': ([a] if (a := mobj.group('clean_artist')) - else [a.strip() for a in mobj.group('artist').split('·')]), + 'artists': artists, 'track': mobj.group('track').strip(), 'release_date': release_date, 'release_year': int_or_none(release_year), diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6397a2c0ca92..66b279a1d1df 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -195,7 +195,8 @@ def get_connection_with_tls_context(self, request, verify, proxies=None, cert=No url = urllib3.util.parse_url(request.url).url manager = self.poolmanager - if proxy := select_proxy(url, proxies): + proxy = select_proxy(url, proxies) + if proxy: manager = self.proxy_manager_for(proxy) return manager.connection_from_url(url) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 997b575cd46a..906926115ccb 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -3,7 +3,7 @@ import optparse import os.path import re -import shlex +from .compat import shlex import shutil import string import sys diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index ff50d5b4fda7..f8a4e075f688 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -1,5 +1,5 @@ import os -import shlex +from ..compat import shlex import subprocess from .common import PostProcessor diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ca70f69a7e9e..44c7bb6613b9 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -71,7 +71,8 @@ def _get_variant_and_executable_path(): machine = '_x86' if platform.architecture()[0][:2] == '32' else '' # sys.executable returns a /tmp/ path for staticx builds (linux_static) # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information - if static_exe_path := os.getenv('STATICX_PROG_PATH'): + static_exe_path = os.getenv('STATICX_PROG_PATH') + if static_exe_path: path = static_exe_path return f'{remove_end(sys.platform, "32")}{machine}_exe', path @@ -135,7 +136,7 @@ def _get_binary_name(): def _get_system_deprecation(): - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 8) + MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7) if sys.version_info > MIN_RECOMMENDED: return None diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 42803bb6dfec..e1aaa9c79f4e 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -26,7 +26,6 @@ import platform import random import re -import shlex import socket import ssl import struct @@ -45,6 +44,7 @@ from . import traversal from ..compat import functools # isort: split +from ..compat import shlex # isort: split from ..compat import ( compat_etree_fromstring, compat_expanduser,