From 38ea00ec8f0b82d6b83d002af63a1ccdb1b6c112 Mon Sep 17 00:00:00 2001 From: itdominator <1itdominator@gmail.com> Date: Tue, 26 May 2026 20:50:58 -0500 Subject: [PATCH] Upgraded youtube_download plugin --- plugins/youtube_download/download.sh | 27 +- plugins/youtube_download/plugin.py | 2 +- plugins/youtube_download/yt_dlp/YoutubeDL.py | 24 +- plugins/youtube_download/yt_dlp/cookies.py | 32 +- .../yt_dlp/downloader/__init__.py | 2 + .../yt_dlp/downloader/soop.py | 61 +++ .../yt_dlp/extractor/_extractors.py | 88 +-- .../yt_dlp/extractor/aenetworks.py | 79 ++- .../yt_dlp/extractor/afreecatv.py | 194 ++++--- .../yt_dlp/extractor/applepodcasts.py | 30 +- .../youtube_download/yt_dlp/extractor/ard.py | 10 +- .../yt_dlp/extractor/bandcamp.py | 18 +- .../yt_dlp/extractor/bilibili.py | 2 +- .../yt_dlp/extractor/boosty.py | 61 ++- .../youtube_download/yt_dlp/extractor/cbc.py | 216 ++++++-- .../youtube_download/yt_dlp/extractor/ccc.py | 22 +- .../yt_dlp/extractor/common.py | 16 +- .../yt_dlp/extractor/dailymotion.py | 59 +- .../youtube_download/yt_dlp/extractor/err.py | 68 +++ .../yt_dlp/extractor/extractors.py | 20 +- .../yt_dlp/extractor/facebook.py | 82 ++- .../yt_dlp/extractor/firsttv.py | 7 + .../yt_dlp/extractor/floatplane.py | 74 +-- .../yt_dlp/extractor/francetv.py | 26 +- .../yt_dlp/extractor/frontro.py | 63 --- .../yt_dlp/extractor/getcourseru.py | 2 +- .../youtube_download/yt_dlp/extractor/lbry.py | 1 + .../yt_dlp/extractor/learningonscreen.py | 2 +- .../yt_dlp/extractor/locipo.py | 209 +++++++ .../yt_dlp/extractor/matchitv.py | 38 ++ .../yt_dlp/extractor/mixcloud.py | 2 +- .../yt_dlp/extractor/neteasemusic.py | 39 +- .../yt_dlp/extractor/odnoklassniki.py | 6 +- .../yt_dlp/extractor/opencast.py | 13 +- .../yt_dlp/extractor/patreon.py | 181 +++++-- .../youtube_download/yt_dlp/extractor/pbs.py | 18 + .../yt_dlp/extractor/peertube.py | 2 +- .../yt_dlp/extractor/pornhub.py | 5 +- .../youtube_download/yt_dlp/extractor/rtp.py | 202 +++++-- .../yt_dlp/extractor/rumble.py | 2 +- .../yt_dlp/extractor/sauceplus.py | 18 +- .../youtube_download/yt_dlp/extractor/sbs.py | 4 +- .../yt_dlp/extractor/soundcloud.py | 55 +- .../yt_dlp/extractor/spankbang.py | 16 +- .../yt_dlp/extractor/steam.py | 75 +-- .../yt_dlp/extractor/streaks.py | 6 +- .../yt_dlp/extractor/tarangplus.py | 3 +- .../yt_dlp/extractor/teachable.py | 4 +- .../yt_dlp/extractor/tele5.py | 63 ++- .../yt_dlp/extractor/thechosen.py | 118 ++++ .../yt_dlp/extractor/tiktok.py | 122 ++++- .../yt_dlp/extractor/truth.py | 3 +- .../youtube_download/yt_dlp/extractor/tver.py | 113 ++++ .../youtube_download/yt_dlp/extractor/tvo.py | 152 ++++++ .../yt_dlp/extractor/twitch.py | 6 +- .../yt_dlp/extractor/twitter.py | 16 +- .../yt_dlp/extractor/unsupported.py | 26 + .../yt_dlp/extractor/vimeo.py | 17 +- .../yt_dlp/extractor/visir.py | 116 ++++ .../youtube_download/yt_dlp/extractor/vk.py | 38 +- .../yt_dlp/extractor/volejtv.py | 171 +++++- .../youtube_download/yt_dlp/extractor/wat.py | 2 +- .../youtube_download/yt_dlp/extractor/whyp.py | 36 +- .../yt_dlp/extractor/xhamster.py | 102 ++-- .../yt_dlp/extractor/youtube/__init__.py | 3 +- .../yt_dlp/extractor/youtube/_base.py | 86 +-- .../yt_dlp/extractor/youtube/_search.py | 15 - .../yt_dlp/extractor/youtube/_tab.py | 4 +- .../yt_dlp/extractor/youtube/_video.py | 494 ++++++++++------- .../extractor/youtube/jsc/_builtin/bun.py | 17 +- .../extractor/youtube/jsc/_builtin/quickjs.py | 16 +- .../youtube/jsc/_builtin/vendor/_info.py | 8 +- .../jsc/_builtin/vendor/yt.solver.core.js | 508 ++++++------------ .../yt_dlp/extractor/youtube/pot/_director.py | 11 +- .../yt_dlp/extractor/youtube/pot/_provider.py | 2 +- .../yt_dlp/extractor/zapiks.py | 237 +++++--- plugins/youtube_download/yt_dlp/jsinterp.py | 16 +- .../yt_dlp/networking/_curlcffi.py | 55 +- plugins/youtube_download/yt_dlp/options.py | 11 +- plugins/youtube_download/yt_dlp/update.py | 2 +- .../yt_dlp/utils/_jsruntime.py | 18 +- .../youtube_download/yt_dlp/utils/_utils.py | 3 + .../yt_dlp/utils/jslib/devalue.py | 4 +- .../yt_dlp/utils/networking.py | 2 +- plugins/youtube_download/yt_dlp/version.py | 6 +- src/core/widgets/icon_grid_widget.py | 2 +- user_config/bin/solarfm | 2 + 87 files changed, 3385 insertions(+), 1424 deletions(-) create mode 100644 plugins/youtube_download/yt_dlp/downloader/soop.py create mode 100644 plugins/youtube_download/yt_dlp/extractor/locipo.py create mode 100644 plugins/youtube_download/yt_dlp/extractor/matchitv.py create mode 100644 plugins/youtube_download/yt_dlp/extractor/thechosen.py create mode 100644 plugins/youtube_download/yt_dlp/extractor/tvo.py create mode 100644 plugins/youtube_download/yt_dlp/extractor/visir.py diff --git a/plugins/youtube_download/download.sh b/plugins/youtube_download/download.sh index 9ec3b68..a99b34d 100755 --- a/plugins/youtube_download/download.sh +++ b/plugins/youtube_download/download.sh @@ -15,22 +15,27 @@ function main() { cd "${_SPATH}" echo "Working Dir: " $(pwd) - rm "${_SPATH}/../../cookies.txt" + # rm "${_SPATH}/../../cookies.txt" # Note: Export cookies to file - python "${_SPATH}/yt_dlp/__main__.py" \ - --cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt" - - # Note: Use cookies from browser directly # python "${_SPATH}/yt_dlp/__main__.py" \ - # --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \ - # -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" + # --cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt" - # Note: Download video python "${_SPATH}/yt_dlp/__main__.py" \ - -f "bestvideo[height<=1080][ext=mp4][vcodec^=av]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ - --cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \ - -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" + --js-runtimes deno \ + --cookies-from-browser firefox \ + --concurrent-fragments 8 \ + --embed-metadata \ + --embed-thumbnail \ + --write-auto-subs \ + --sub-langs "en.*" \ + --embed-subs \ + --merge-output-format mp4 \ + --remux-video mp4 \ + -f "bv*[height<=1080]+ba/b[height<=1080]" \ + -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" + "${LINK}" + } main "$@"; \ No newline at end of file diff --git a/plugins/youtube_download/plugin.py b/plugins/youtube_download/plugin.py index b5decd4..0fee2cc 100644 --- a/plugins/youtube_download/plugin.py +++ b/plugins/youtube_download/plugin.py @@ -47,4 +47,4 @@ class Plugin(PluginBase): @threaded def _download(self, dir): - subprocess.Popen([f'{self.path}/download.sh', dir], start_new_session=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, close_fds=True) + subprocess.Popen([f'{self.path}/download.sh', dir], start_new_session=True, stdout=subprocess.DEVNULL, close_fds=True) diff --git a/plugins/youtube_download/yt_dlp/YoutubeDL.py b/plugins/youtube_download/yt_dlp/YoutubeDL.py index aceaa59..7db6303 100644 --- a/plugins/youtube_download/yt_dlp/YoutubeDL.py +++ b/plugins/youtube_download/yt_dlp/YoutubeDL.py @@ -1602,8 +1602,10 @@ class YoutubeDL: if ret is NO_DEFAULT: while True: filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) - reply = input(self._format_screen( - f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + self.to_screen( + self._format_screen(f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS), + skip_eol=True) + reply = input().lower().strip() if reply in {'y', ''}: return None elif reply == 'n': @@ -3030,9 +3032,10 @@ class YoutubeDL: # Bypass interactive format selection if no formats & --ignore-no-formats-error formats_to_download = None break - req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) - + '(Press ENTER for default, or Ctrl+C to quit)' - + self._format_screen(': ', self.Styles.EMPHASIS)) + self.to_screen(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) + + '(Press ENTER for default, or Ctrl+C to quit)' + + self._format_screen(': ', self.Styles.EMPHASIS), skip_eol=True) + req_format = input() try: format_selector = self.build_format_selector(req_format) if req_format else None except SyntaxError as err: @@ -3478,11 +3481,12 @@ class YoutubeDL: if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: - for f in info_dict['requested_formats'] if fd != FFmpegFD else []: - f['filepath'] = fname = prepend_extension( - correct_ext(temp_filename, info_dict['ext']), - 'f{}'.format(f['format_id']), info_dict['ext']) - downloaded.append(fname) + if fd != FFmpegFD and temp_filename != '-': + for f in info_dict['requested_formats']: + f['filepath'] = fname = prepend_extension( + correct_ext(temp_filename, info_dict['ext']), + 'f{}'.format(f['format_id']), info_dict['ext']) + downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download diff --git a/plugins/youtube_download/yt_dlp/cookies.py b/plugins/youtube_download/yt_dlp/cookies.py index 23f90d6..831b3a2 100644 --- a/plugins/youtube_download/yt_dlp/cookies.py +++ b/plugins/youtube_download/yt_dlp/cookies.py @@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): # We use Morsel's legal key chars to avoid errors on setting values _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') + _LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII) _RESERVED = { 'expires', @@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): # Added 'bad' group to catch the remaining value _COOKIE_PATTERN = re.compile(r''' - \s* # Optional whitespace at start of cookie + [ ]* # Optional whitespace at start of cookie (?P # Start of group 'key' - [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter + [^ =;]+ # Match almost anything here for now and validate later ) # End of group 'key' ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign + [ ]*=[ ]* # Equal Sign ( # Start of potential value (?P # Start of group 'val' "(?:[^\\"]|\\.)*" # Any doublequoted string | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + \w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr | # or [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' @@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values ) # End of potential value )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. + [ ]* # Any number of spaces. + ([ ]+|;|$) # Ending either at space, semicolon, or EOS. ''', re.ASCII | re.VERBOSE) + # http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+ + # Ref: https://github.com/python/cpython/issues/143919 + _CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]') + def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 if not isinstance(data, str): @@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): continue key, value = match.group('key', 'val') + if not self._LEGAL_KEY_RE.fullmatch(key): + morsel = None + continue is_attribute = False if key.startswith('$'): @@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): value = True else: value, _ = self.value_decode(value) + # Guard against control characters in quoted attribute values + if self._CONTROL_CHARACTER_RE.search(value): + # While discarding the entire morsel is not very lenient, + # it's better than http.cookies.Morsel raising a CookieError + # and it's probably better to err on the side of caution + self.pop(morsel.key, None) + morsel = None + continue morsel[key] = value @@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): elif value is not None: morsel = self.get(key, http.cookies.Morsel()) real_value, coded_value = self.value_decode(value) + # Guard against control characters in quoted cookie values + if self._CONTROL_CHARACTER_RE.search(real_value): + morsel = None + continue morsel.set(key, real_value, coded_value) self[key] = morsel diff --git a/plugins/youtube_download/yt_dlp/downloader/__init__.py b/plugins/youtube_download/yt_dlp/downloader/__init__.py index 17458b9..ff80573 100644 --- a/plugins/youtube_download/yt_dlp/downloader/__init__.py +++ b/plugins/youtube_download/yt_dlp/downloader/__init__.py @@ -36,6 +36,7 @@ from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD from .bunnycdn import BunnyCdnFD +from .soop import SoopVodFD PROTOCOL_MAP = { 'rtmp': RtmpFD, @@ -56,6 +57,7 @@ PROTOCOL_MAP = { 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, 'bunnycdn': BunnyCdnFD, + 'soopvod': SoopVodFD, } diff --git a/plugins/youtube_download/yt_dlp/downloader/soop.py b/plugins/youtube_download/yt_dlp/downloader/soop.py new file mode 100644 index 0000000..3262026 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/soop.py @@ -0,0 +1,61 @@ +import threading +import time + +from .common import FileDownloader +from . import HlsFD +from ..extractor.afreecatv import _cloudfront_auth_request +from ..networking.exceptions import network_exceptions + + +class SoopVodFD(FileDownloader): + """ + Downloads Soop subscription VODs with required cookie refresh requests + Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + self.to_screen(f'[{self.FD_NAME}] Downloading Soop subscription VOD HLS') + fd = HlsFD(self.ydl, self.params) + refresh_params = info_dict['_cookie_refresh_params'] + referer_url = info_dict['webpage_url'] + + stop_event = threading.Event() + refresh_thread = threading.Thread( + target=self._cookie_refresh_thread, + args=(stop_event, refresh_params, referer_url), + ) + refresh_thread.start() + + try: + return fd.real_download(filename, info_dict) + finally: + stop_event.set() + + def _cookie_refresh_thread(self, stop_event, refresh_params, referer_url): + m3u8_url = refresh_params['m3u8_url'] + strm_id = refresh_params['strm_id'] + video_id = refresh_params['video_id'] + + def _get_cloudfront_cookie_expiration(m3u8_url): + cookies = self.ydl.cookiejar.get_cookies_for_url(m3u8_url) + return min((cookie.expires for cookie in cookies if 'CloudFront' in cookie.name and cookie.expires), default=0) + + while not stop_event.wait(5): + current_time = time.time() + expiration_time = _get_cloudfront_cookie_expiration(m3u8_url) + last_refresh_check = refresh_params.get('_last_refresh', 0) + + # Cookie TTL is 90 seconds, but let's give ourselves a 15-second cushion + should_refresh = ( + (expiration_time and current_time >= expiration_time - 15) + or (not expiration_time and current_time - last_refresh_check >= 75) + ) + + if should_refresh: + try: + self.ydl.urlopen(_cloudfront_auth_request( + m3u8_url, strm_id, video_id, referer_url)).read() + refresh_params['_last_refresh'] = current_time + except network_exceptions as e: + self.to_screen(f'[{self.FD_NAME}] Cookie refresh attempt failed: {e}') diff --git a/plugins/youtube_download/yt_dlp/extractor/_extractors.py b/plugins/youtube_download/yt_dlp/extractor/_extractors.py index ea49a25..1a29a93 100644 --- a/plugins/youtube_download/yt_dlp/extractor/_extractors.py +++ b/plugins/youtube_download/yt_dlp/extractor/_extractors.py @@ -1,32 +1,4 @@ # flake8: noqa: F401 -# isort: off - -from .youtube import ( # Youtube is moved to the top to improve performance - YoutubeIE, - YoutubeClipIE, - YoutubeFavouritesIE, - YoutubeNotificationsIE, - YoutubeHistoryIE, - YoutubeTabIE, - YoutubeLivestreamEmbedIE, - YoutubePlaylistIE, - YoutubeRecommendedIE, - YoutubeSearchDateIE, - YoutubeSearchIE, - YoutubeSearchURLIE, - YoutubeMusicSearchURLIE, - YoutubeSubscriptionsIE, - YoutubeTruncatedIDIE, - YoutubeTruncatedURLIE, - YoutubeYtBeIE, - YoutubeYtUserIE, - YoutubeWatchLaterIE, - YoutubeShortsAudioPivotIE, - YoutubeConsentRedirectIE, -) - -# isort: on - from .abc import ( ABCIE, ABCIViewIE, @@ -339,8 +311,10 @@ from .canalsurmas import CanalsurmasIE from .caracoltv import CaracolTvPlayIE from .cbc import ( CBCIE, + CBCGemContentIE, CBCGemIE, CBCGemLiveIE, + CBCGemOlympicsIE, CBCGemPlaylistIE, CBCListenIE, CBCPlayerIE, @@ -592,7 +566,10 @@ from .eroprofile import ( EroProfileAlbumIE, EroProfileIE, ) -from .err import ERRJupiterIE +from .err import ( + ERRArhiivIE, + ERRJupiterIE, +) from .ertgr import ( ERTFlixCodenameIE, ERTFlixIE, @@ -695,10 +672,6 @@ from .frontendmasters import ( FrontendMastersIE, FrontendMastersLessonIE, ) -from .frontro import ( - TheChosenGroupIE, - TheChosenIE, -) from .fujitv import FujiTVFODPlus7IE from .funk import FunkIE from .funker530 import Funker530IE @@ -1054,6 +1027,10 @@ from .livestream import ( ) from .livestreamfails import LivestreamfailsIE from .lnk import LnkIE +from .locipo import ( + LocipoIE, + LocipoPlaylistIE, +) from .loco import LocoIE from .loom import ( LoomFolderIE, @@ -1096,6 +1073,7 @@ from .markiza import ( ) from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE +from .matchitv import MatchiTVIE from .matchtv import MatchTVIE from .mave import ( MaveChannelIE, @@ -1810,7 +1788,10 @@ from .safari import ( from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE -from .sauceplus import SaucePlusIE +from .sauceplus import ( + SaucePlusChannelIE, + SaucePlusIE, +) from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, @@ -2078,6 +2059,10 @@ from .tenplay import ( from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE +from .thechosen import ( + TheChosenGroupIE, + TheChosenIE, +) from .theguardian import ( TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, @@ -2199,11 +2184,15 @@ from .tvc import ( TVCIE, TVCArticleIE, ) -from .tver import TVerIE +from .tver import ( + TVerIE, + TVerOlympicIE, +) from .tvigle import TvigleIE from .tviplayer import TVIPlayerIE from .tvn24 import TVN24IE from .tvnoe import TVNoeIE +from .tvo import TvoIE from .tvopengr import ( TVOpenGrEmbedIE, TVOpenGrWatchIE, @@ -2368,6 +2357,7 @@ from .vimm import ( ) from .viously import ViouslyIE from .viqeo import ViqeoIE +from .visir import VisirIE from .viu import ( ViuIE, ViuOTTIE, @@ -2388,7 +2378,11 @@ from .voicy import ( VoicyChannelIE, VoicyIE, ) -from .volejtv import VolejTVIE +from .volejtv import ( + VolejTVCategoryPlaylistIE, + VolejTVClubPlaylistIE, + VolejTVIE, +) from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, @@ -2551,6 +2545,28 @@ from .youporn import ( YouPornTagIE, YouPornVideosIE, ) +from .youtube import ( + YoutubeClipIE, + YoutubeConsentRedirectIE, + YoutubeFavouritesIE, + YoutubeHistoryIE, + YoutubeIE, + YoutubeLivestreamEmbedIE, + YoutubeMusicSearchURLIE, + YoutubeNotificationsIE, + YoutubePlaylistIE, + YoutubeRecommendedIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeShortsAudioPivotIE, + YoutubeSubscriptionsIE, + YoutubeTabIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeWatchLaterIE, + YoutubeYtBeIE, + YoutubeYtUserIE, +) from .zaiko import ( ZaikoETicketIE, ZaikoIE, diff --git a/plugins/youtube_download/yt_dlp/extractor/aenetworks.py b/plugins/youtube_download/yt_dlp/extractor/aenetworks.py index a4a5f40..65243c5 100644 --- a/plugins/youtube_download/yt_dlp/extractor/aenetworks.py +++ b/plugins/youtube_download/yt_dlp/extractor/aenetworks.py @@ -5,10 +5,12 @@ from ..utils import ( ExtractorError, GeoRestrictedError, int_or_none, + make_archive_id, remove_start, - traverse_obj, update_url_query, + url_or_none, ) +from ..utils.traversal import traverse_obj class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE @@ -29,6 +31,19 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'historyvault.com': (None, 'historyvault', None), 'biography.com': (None, 'biography', None), } + _GRAPHQL_QUERY = ''' + query getUserVideo($videoId: ID!) { + video(id: $videoId) { + title + publicUrl + programId + tvSeasonNumber + tvSeasonEpisodeNumber + series { + title + } + } + }''' def _extract_aen_smil(self, smil_url, video_id, auth=None): query = { @@ -73,19 +88,39 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE def _extract_aetn_info(self, domain, filter_key, filter_value, url): requestor_id, brand, software_statement = self._DOMAIN_MAP[domain] + if filter_key == 'canonical': + webpage = self._download_webpage(url, filter_value) + graphql_video_id = self._search_regex( + r']+\bcontent="[^"]*\btpid/(\d+)"', webpage, 'id', + default=None) or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True) + else: + graphql_video_id = filter_value + result = self._download_json( - f'https://feeds.video.aetnd.com/api/v2/{brand}/videos', - filter_value, query={f'filter[{filter_key}]': filter_value}) - result = traverse_obj( - result, ('results', - lambda k, v: k == 0 and v[filter_key] == filter_value), - get_all=False) - if not result: + 'https://yoga.appsvcs.aetnd.com/', graphql_video_id, + query={ + 'brand': brand, + 'mode': 'live', + 'platform': 'web', + }, + data=json.dumps({ + 'operationName': 'getUserVideo', + 'variables': { + 'videoId': graphql_video_id, + }, + 'query': self._GRAPHQL_QUERY, + }).encode(), + headers={ + 'Content-Type': 'application/json', + }) + + result = traverse_obj(result, ('data', 'video', {dict})) + media_url = traverse_obj(result, ('publicUrl', {url_or_none})) + if not media_url: raise ExtractorError('Show not found in A&E feed (too new?)', expected=True, video_id=remove_start(filter_value, '/')) title = result['title'] - video_id = result['id'] - media_url = result['publicUrl'] + video_id = result['programId'] theplatform_metadata = self._download_theplatform_metadata(self._search_regex( r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) @@ -100,9 +135,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE info.update(self._extract_aen_smil(media_url, video_id, auth)) info.update({ 'title': title, - 'series': result.get('seriesName'), - 'season_number': int_or_none(result.get('tvSeasonNumber')), - 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')), + 'display_id': graphql_video_id, + '_old_archive_ids': [make_archive_id(self, graphql_video_id)], + **traverse_obj(result, { + 'series': ('series', 'title', {str}), + 'season_number': ('tvSeasonNumber', {int_or_none}), + 'episode_number': ('tvSeasonEpisodeNumber', {int_or_none}), + }), }) return info @@ -116,7 +155,7 @@ class AENetworksIE(AENetworksBaseIE): (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ - 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', + 'url': 'https://www.history.com/shows/mountain-men/season-1/episode-1', 'info_dict': { 'id': '22253814', 'ext': 'mp4', @@ -139,11 +178,11 @@ class AENetworksIE(AENetworksBaseIE): }, 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], - 'skip': 'Geo-restricted - This content is not available in your location.', + 'skip': 'This content requires a valid, unexpired auth token', }, { - 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', + 'url': 'https://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'info_dict': { - 'id': '600587331957', + 'id': '147486', 'ext': 'mp4', 'title': 'Inlawful Entry', 'description': 'md5:57c12115a2b384d883fe64ca50529e08', @@ -160,6 +199,8 @@ class AENetworksIE(AENetworksBaseIE): 'season_number': 9, 'series': 'Duck Dynasty', 'age_limit': 0, + 'display_id': '600587331957', + '_old_archive_ids': ['aenetworks 600587331957'], }, 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], @@ -186,6 +227,7 @@ class AENetworksIE(AENetworksBaseIE): }, 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], + 'skip': '404 Not Found', }, { 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', 'info_dict': { @@ -209,6 +251,7 @@ class AENetworksIE(AENetworksBaseIE): }, 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], + 'skip': 'This content requires a valid, unexpired auth token', }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'only_matching': True, @@ -259,7 +302,7 @@ class AENetworksListBaseIE(AENetworksBaseIE): domain, slug = self._match_valid_url(url).groups() _, brand, _ = self._DOMAIN_MAP[domain] playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) - base_url = f'http://watch.{domain}' + base_url = f'https://watch.{domain}' entries = [] for item in (playlist.get(self._ITEMS_KEY) or []): diff --git a/plugins/youtube_download/yt_dlp/extractor/afreecatv.py b/plugins/youtube_download/yt_dlp/extractor/afreecatv.py index aadb4d6..d8a0259 100644 --- a/plugins/youtube_download/yt_dlp/extractor/afreecatv.py +++ b/plugins/youtube_download/yt_dlp/extractor/afreecatv.py @@ -1,5 +1,6 @@ import datetime as dt import functools +import time from .common import InfoExtractor from ..networking import Request @@ -16,7 +17,23 @@ from ..utils import ( urlencode_postdata, urljoin, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import require, traverse_obj + + +def _cloudfront_auth_request(m3u8_url, strm_id, video_id, referer_url): + return Request( + 'https://live.sooplive.com/api/private_auth.php', + method='POST', + headers={ + 'Referer': referer_url, + 'Origin': 'https://vod.sooplive.com', + }, + data=urlencode_postdata({ + 'type': 'vod', + 'strm_id': strm_id, + 'title_no': video_id, + 'url': m3u8_url, + })) class AfreecaTVBaseIE(InfoExtractor): @@ -34,17 +51,17 @@ class AfreecaTVBaseIE(InfoExtractor): } response = self._download_json( - 'https://login.sooplive.co.kr/app/LoginAction.php', None, + 'https://login.sooplive.com/app/LoginAction.php', None, 'Logging in', data=urlencode_postdata(login_form)) _ERRORS = { -4: 'Your account has been suspended due to a violation of our terms and policies.', - -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', - -6: 'https://login.sooplive.co.kr/membership/changeMember.php', + -5: 'https://member.sooplive.com/app/user_delete_progress.php', + -6: 'https://login.sooplive.com/membership/changeMember.php', -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", - -9: 'https://member.sooplive.co.kr/app/pop_login_block.php', - -11: 'https://login.sooplive.co.kr/afreeca/second_login.php', - -12: 'https://member.sooplive.co.kr/app/user_security.php', + -9: 'https://member.sooplive.com/app/pop_login_block.php', + -11: 'https://login.sooplive.com/afreeca/second_login.php', + -12: 'https://member.sooplive.com/app/user_security.php', 0: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.', -3: 'You have entered your username/password incorrectly.', @@ -62,7 +79,7 @@ class AfreecaTVBaseIE(InfoExtractor): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): return self._download_json(Request( - f'https://api.m.sooplive.co.kr/{endpoint}', + f'https://api.m.sooplive.com/{endpoint}', data=data, headers=headers, query=query, extensions={'legacy_ssl': True}), display_id, 'Downloading API JSON', 'Unable to download API JSON') @@ -78,36 +95,36 @@ class AfreecaTVBaseIE(InfoExtractor): class AfreecaTVIE(AfreecaTVBaseIE): IE_NAME = 'soop' - IE_DESC = 'sooplive.co.kr' - _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P\d+)/?(?:$|[?#&])' + IE_DESC = 'sooplive.com' + _VALID_URL = r'https?://vod\.sooplive\.com/(?:PLAYER/STATION|player)/(?P\d+)/?(?:$|[?#&])' _TESTS = [{ - 'url': 'https://vod.sooplive.co.kr/player/96753363', + 'url': 'https://vod.sooplive.com/player/192805325', 'info_dict': { - 'id': '20230108_9FF5BEE1_244432674_1', + 'id': '20260414_1B44E53B_293230967_1', 'ext': 'mp4', 'uploader_id': 'rlantnghks', 'uploader': '페이즈으', - 'duration': 10840, - 'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', - 'upload_date': '20230108', - 'timestamp': 1673186405, - 'title': '젠지 페이즈', + 'duration': 10869, + 'thumbnail': r're:https?://videoimg\.sooplive\.com/.+', + 'upload_date': '20260414', + 'timestamp': 1776174982, + 'title': 'T1 Peyz [CC]', }, 'params': { 'skip_download': True, }, }, { # non standard key - 'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', + 'url': 'https://vod.sooplive.com/PLAYER/STATION/20515605', 'info_dict': { - 'id': '20170411_BE689A0E_190960999_1_2_h', + 'id': 'BE689A0E_190960999_1_2_A', 'ext': 'mp4', 'title': '혼자사는여자집', - 'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', + 'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|sooplive\.com|afreecatv\.com)/.+', 'uploader': '♥이슬이', 'uploader_id': 'dasl8121', 'upload_date': '20170411', - 'timestamp': 1491929865, + 'timestamp': 1491897465, 'duration': 213, }, 'params': { @@ -115,32 +132,32 @@ class AfreecaTVIE(AfreecaTVBaseIE): }, }, { # adult content - 'url': 'https://vod.sooplive.co.kr/player/97267690', + 'url': 'https://vod.sooplive.com/player/191612613', 'info_dict': { - 'id': '20180327_27901457_202289533_1', + 'id': '20260403_A4534670_292914441_1', 'ext': 'mp4', - 'title': '[생]빨개요♥ (part 1)', - 'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', - 'uploader': '[SA]서아', - 'uploader_id': 'bjdyrksu', - 'upload_date': '20180327', - 'duration': 3601, + 'title': '하이', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.com/.+', + 'uploader': '세림잉', + 'uploader_id': 'serimm', + 'upload_date': '20260402', + 'duration': 7015, }, 'params': { 'skip_download': True, }, - 'skip': 'The VOD does not exist', + 'skip': 'Needs login to view', }, { # adult content - 'url': 'https://vod.sooplive.co.kr/player/70395877', + 'url': 'https://vod.sooplive.com/player/70395877', 'only_matching': True, }, { # subscribers only - 'url': 'https://vod.sooplive.co.kr/player/104647403', + 'url': 'https://vod.sooplive.com/player/104647403', 'only_matching': True, }, { # private - 'url': 'https://vod.sooplive.co.kr/player/81669846', + 'url': 'https://vod.sooplive.com/player/81669846', 'only_matching': True, }] @@ -153,6 +170,13 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'nApiLevel': 10, }))['data'] + initial_refresh_time = 0 + strm_id = None + # For subscriber-only VODs, we need to call private_auth.php to get CloudFront cookies + needs_private_auth = traverse_obj(data, ('sub_upload_type', {str})) + if needs_private_auth: + strm_id = traverse_obj(data, ('bj_id', {str}, {require('stream ID')})) + error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: raise ExtractorError('The VOD does not exist', expected=True) @@ -172,9 +196,23 @@ class AfreecaTVIE(AfreecaTVBaseIE): traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1): file_url = file_element['file'] if determine_ext(file_url) == 'm3u8': + if needs_private_auth: + self._request_webpage( + _cloudfront_auth_request(file_url, strm_id, video_id, url), + video_id, 'Requesting CloudFront cookies', 'Failed to get CloudFront cookies') + initial_refresh_time = time.time() formats = self._extract_m3u8_formats( file_url, video_id, 'mp4', m3u8_id='hls', note=f'Downloading part {file_num} m3u8 information') + if needs_private_auth: + for fmt in formats: + fmt['protocol'] = 'soopvod' + fmt['_cookie_refresh_params'] = { + 'm3u8_url': file_url, + 'strm_id': strm_id, + 'video_id': video_id, + '_last_refresh': initial_refresh_time, + } else: formats = [{ 'url': file_url, @@ -217,10 +255,10 @@ class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): IE_NAME = 'soop:catchstory' - IE_DESC = 'sooplive.co.kr catch story' - _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P\d+)/catchstory' + IE_DESC = 'sooplive.com catch story' + _VALID_URL = r'https?://vod\.sooplive\.com/player/(?P\d+)/catchstory' _TESTS = [{ - 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', + 'url': 'https://vod.sooplive.com/player/103247/catchstory', 'info_dict': { 'id': '103247', }, @@ -253,10 +291,10 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): class AfreecaTVLiveIE(AfreecaTVBaseIE): IE_NAME = 'soop:live' - IE_DESC = 'sooplive.co.kr livestreams' - _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)(?:/(?P\d+))?' + IE_DESC = 'sooplive.com livestreams' + _VALID_URL = r'https?://play\.sooplive\.com/(?P[^/?#]+)(?:/(?P\d+))?' _TESTS = [{ - 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', + 'url': 'https://play.sooplive.com/pyh3646/237852185', 'info_dict': { 'id': '237852185', 'ext': 'mp4', @@ -268,30 +306,33 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): }, 'skip': 'Livestream has ended', }, { - 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', + 'url': 'https://play.sooplive.com/pyh3646/237852185', 'only_matching': True, }, { - 'url': 'https://play.sooplive.co.kr/pyh3646', + 'url': 'https://play.sooplive.com/pyh3646', 'only_matching': True, }] - _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' + _LIVE_API_URL = 'https://live.sooplive.com/afreeca/player_live_api.php' _WORKING_CDNS = [ - 'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr - 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr - 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr - 'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr + 'gcp_cdn', # live-global-cdn-v02.sooplive.com + 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.com + 'gs_cdn_pc_web', # pc-web.stream.sooplive.com + 'lg_cdn_pc_web', # live-pcweb-kr-cdn-z02.sooplive.com + 'lg_cdn_mobile_web', # live-mobileweb-kr-cdn-z02.sooplive.com + 'azure_cdn', # live-global-cdn-v02.sooplive.com + 'aws_cf', # live-global-cdn-v02.sooplive.com ] _BAD_CDNS = [ - 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) - 'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400) - 'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve) - 'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve) - 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) + 'gs_cdn', # chromecast.stream.sooplive.com (cannot resolve) + 'gs_cdn_chromecast', # chromecast.stream.sooplive.com (HTTP Error 400) + 'lg_cdn_chromecast', # live-chromecast-kr-cdn-z02.sooplive.com (HTTP Error 403) + 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr (HTTP Error 400) Might not exist anymore + 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) Might not exist anymore ] def _extract_formats(self, channel_info, broadcast_no, aid): - stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' + stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.com' # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs default_cdn_ids = orderedSet([ @@ -311,7 +352,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): try: return self._extract_m3u8_formats( m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, - headers={'Referer': 'https://play.sooplive.co.kr/'}) + headers={'Referer': 'https://play.sooplive.com/'}) except ExtractorError as e: if attempt == len(cdn_ids): raise @@ -362,7 +403,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): formats = self._extract_formats(channel_info, broadcast_no, aid) station_info = traverse_obj(self._download_json( - 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, + 'https://st.sooplive.com/api/get_station_status.php', broadcast_no, 'Downloading channel metadata', 'Unable to download channel metadata', query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} @@ -380,50 +421,51 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): class AfreecaTVUserIE(AfreecaTVBaseIE): IE_NAME = 'soop:user' - _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)/vods/?(?P[^/?#]+)?' + _VALID_URL = r'https?://(?:www\.)?sooplive\.com/station/(?P[^/?#]+)/vod/?(?P[^/?#]+)?' _TESTS = [{ - 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', + 'url': 'https://www.sooplive.com/station/devil0108/vod/review', 'info_dict': { '_type': 'playlist', - 'id': 'ryuryu24', - 'title': 'ryuryu24 - review', + 'id': 'devil0108', + 'title': 'devil0108 - review', }, - 'playlist_count': 218, + 'playlist_mincount': 300, }, { - 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', + 'url': 'https://www.sooplive.com/station/devil0108/vod', + 'info_dict': { + '_type': 'playlist', + 'id': 'devil0108', + 'title': 'devil0108 - all', + }, + 'playlist_mincount': 300, + }, { + 'url': 'https://www.sooplive.com/station/parang1995/vod/clip', 'info_dict': { '_type': 'playlist', 'id': 'parang1995', - 'title': 'parang1995 - highlight', + 'title': 'parang1995 - clip', }, - 'playlist_count': 997, + 'playlist_mincount': 300, }, { - 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', + 'url': 'https://www.sooplive.com/station/phonics1/vod/normal', 'info_dict': { '_type': 'playlist', - 'id': 'ryuryu24', - 'title': 'ryuryu24 - all', + 'id': 'phonics1', + 'title': 'phonics1 - normal', }, - 'playlist_count': 221, - }, { - 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', - 'info_dict': { - '_type': 'playlist', - 'id': 'ryuryu24', - 'title': 'ryuryu24 - balloonclip', - }, - 'playlist_count': 0, + 'playlist_mincount': 300, }] _PER_PAGE = 60 def _fetch_page(self, user_id, user_type, page): page += 1 - info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, + info = self._download_json(f'https://chapi.sooplive.com/api/{user_id}/vods/{user_type}', user_id, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, note=f'Downloading {user_type} video page {page}') for item in info['data']: yield self.url_result( - f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + f'https://vod.sooplive.com/player/{item["title_no"]}/', + AfreecaTVIE, item['title_no'], item.get('title_name')) def _real_extract(self, url): user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') diff --git a/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py b/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py index 91a7028..eb30fb2 100644 --- a/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py +++ b/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py @@ -11,18 +11,18 @@ from ..utils.traversal import traverse_obj class ApplePodcastsIE(InfoExtractor): _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P\d+)' _TESTS = [{ - 'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654', - 'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172', + 'url': 'https://podcasts.apple.com/us/podcast/urbana-podcast-724-by-david-penn/id1531349107?i=1000748574256', + 'md5': 'f8a6f92735d0cfbd5e6a7294151e28d8', 'info_dict': { - 'id': '1000665010654', - 'ext': 'mp3', - 'title': 'Ferreck Dawn - To The Break of Dawn 117', - 'episode': 'Ferreck Dawn - To The Break of Dawn 117', - 'description': 'md5:8c4f5c2c30af17ed6a98b0b9daf15b76', - 'upload_date': '20240812', - 'timestamp': 1723449600, - 'duration': 3596, - 'series': 'Ferreck Dawn - To The Break of Dawn', + 'id': '1000748574256', + 'ext': 'm4a', + 'title': 'URBANA PODCAST 724 BY DAVID PENN', + 'episode': 'URBANA PODCAST 724 BY DAVID PENN', + 'description': 'md5:fec77bacba32db8c9b3dda5486ed085f', + 'upload_date': '20260206', + 'timestamp': 1770400801, + 'duration': 3602, + 'series': 'Urbana Radio Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', }, }, { @@ -57,22 +57,22 @@ class ApplePodcastsIE(InfoExtractor): webpage = self._download_webpage(url, episode_id) server_data = self._search_json( r'', webpage), (..., {json.loads})) data = get_first(post_data, ( - 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., - 'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict})) + 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., ( + ('__bbox', 'result', 'data', 'ad_library_main', 'deeplink_ad_archive_result', 'deeplink_ad_archive'), + # old path + ('entryPointRoot', 'otherProps', 'deeplinkAdCard'), + ), 'snapshot', {dict})) if not data: raise ExtractorError('Unable to extract ad data') @@ -1084,11 +1152,12 @@ class FacebookAdsIE(InfoExtractor): 'title': title, 'description': markup or None, }, traverse_obj(data, { - 'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}), + 'description': ( + (('body', 'text'), 'link_description'), + {lambda x: x if not x.startswith('{{product.') else None}, any), 'uploader': ('page_name', {str}), 'uploader_id': ('page_id', {str_or_none}), 'uploader_url': ('page_profile_uri', {url_or_none}), - 'timestamp': ('creation_time', {int_or_none}), 'like_count': ('page_like_count', {int_or_none}), })) @@ -1099,7 +1168,8 @@ class FacebookAdsIE(InfoExtractor): entries.append({ 'id': f'{video_id}_{idx}', 'title': entry.get('title') or title, - 'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'), + 'description': traverse_obj( + entry, 'body', 'link_description', expected_type=str) or info_dict.get('description'), 'thumbnail': url_or_none(entry.get('video_preview_image_url')), 'formats': self._extract_formats(entry), }) diff --git a/plugins/youtube_download/yt_dlp/extractor/firsttv.py b/plugins/youtube_download/yt_dlp/extractor/firsttv.py index 86ad7d7..de82183 100644 --- a/plugins/youtube_download/yt_dlp/extractor/firsttv.py +++ b/plugins/youtube_download/yt_dlp/extractor/firsttv.py @@ -3,10 +3,12 @@ import urllib.parse from .common import InfoExtractor from ..utils import ( determine_ext, + float_or_none, int_or_none, join_nonempty, mimetype2ext, parse_qs, + unescapeHTML, unified_strdate, url_or_none, ) @@ -107,6 +109,11 @@ class FirstTVIE(InfoExtractor): 'timestamp': ('dvr_begin_at', {int_or_none}), 'upload_date': ('date_air', {unified_strdate}), 'duration': ('duration', {int_or_none}), + 'chapters': ('episodes', lambda _, v: float_or_none(v['from']) is not None, { + 'start_time': ('from', {float_or_none}), + 'title': ('name', {str}, {unescapeHTML}), + 'end_time': ('to', {float_or_none}), + }), }), 'id': video_id, 'formats': formats, diff --git a/plugins/youtube_download/yt_dlp/extractor/floatplane.py b/plugins/youtube_download/yt_dlp/extractor/floatplane.py index 31723c2..3c24c6a 100644 --- a/plugins/youtube_download/yt_dlp/extractor/floatplane.py +++ b/plugins/youtube_download/yt_dlp/extractor/floatplane.py @@ -318,9 +318,48 @@ class FloatplaneIE(FloatplaneBaseIE): self.raise_login_required() -class FloatplaneChannelIE(InfoExtractor): +class FloatplaneChannelBaseIE(InfoExtractor): + """Subclasses must set _RESULT_IE, _BASE_URL and _PAGE_SIZE""" + + def _fetch_page(self, display_id, creator_id, channel_id, page): + query = { + 'id': creator_id, + 'limit': self._PAGE_SIZE, + 'fetchAfter': page * self._PAGE_SIZE, + } + if channel_id: + query['channel'] = channel_id + page_data = self._download_json( + f'{self._BASE_URL}/api/v3/content/creator', display_id, + query=query, note=f'Downloading page {page + 1}') + for post in page_data or []: + yield self.url_result( + f'{self._BASE_URL}/post/{post["id"]}', + self._RESULT_IE, id=post['id'], title=post.get('title'), + release_timestamp=parse_iso8601(post.get('releaseDate'))) + + def _real_extract(self, url): + creator, channel = self._match_valid_url(url).group('id', 'channel') + display_id = join_nonempty(creator, channel, delim='/') + + creator_data = self._download_json( + f'{self._BASE_URL}/api/v3/creator/named', + display_id, query={'creatorURL[0]': creator})[0] + + channel_data = traverse_obj( + creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {} + + return self.playlist_result(OnDemandPagedList(functools.partial( + self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE), + display_id, title=channel_data.get('title') or creator_data.get('title'), + description=channel_data.get('about') or creator_data.get('about')) + + +class FloatplaneChannelIE(FloatplaneChannelBaseIE): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' + _BASE_URL = 'https://www.floatplane.com' _PAGE_SIZE = 20 + _RESULT_IE = FloatplaneIE _TESTS = [{ 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo', 'info_dict': { @@ -346,36 +385,3 @@ class FloatplaneChannelIE(InfoExtractor): }, 'playlist_mincount': 200, }] - - def _fetch_page(self, display_id, creator_id, channel_id, page): - query = { - 'id': creator_id, - 'limit': self._PAGE_SIZE, - 'fetchAfter': page * self._PAGE_SIZE, - } - if channel_id: - query['channel'] = channel_id - page_data = self._download_json( - 'https://www.floatplane.com/api/v3/content/creator', display_id, - query=query, note=f'Downloading page {page + 1}') - for post in page_data or []: - yield self.url_result( - f'https://www.floatplane.com/post/{post["id"]}', - FloatplaneIE, id=post['id'], title=post.get('title'), - release_timestamp=parse_iso8601(post.get('releaseDate'))) - - def _real_extract(self, url): - creator, channel = self._match_valid_url(url).group('id', 'channel') - display_id = join_nonempty(creator, channel, delim='/') - - creator_data = self._download_json( - 'https://www.floatplane.com/api/v3/creator/named', - display_id, query={'creatorURL[0]': creator})[0] - - channel_data = traverse_obj( - creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {} - - return self.playlist_result(OnDemandPagedList(functools.partial( - self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE), - display_id, title=channel_data.get('title') or creator_data.get('title'), - description=channel_data.get('about') or creator_data.get('about')) diff --git a/plugins/youtube_download/yt_dlp/extractor/francetv.py b/plugins/youtube_download/yt_dlp/extractor/francetv.py index 873b4eb..7148ec0 100644 --- a/plugins/youtube_download/yt_dlp/extractor/francetv.py +++ b/plugins/youtube_download/yt_dlp/extractor/francetv.py @@ -371,15 +371,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): class FranceTVInfoIE(FranceTVBaseInfoExtractor): - IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' + IE_NAME = 'franceinfo' + IE_DESC = 'franceinfo.fr (formerly francetvinfo.fr)' + _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.france(?:tv)?info.fr/(?:[^/?#]+/)*(?P[^/?#&.]+)' _TESTS = [{ 'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html', 'info_dict': { 'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793', 'ext': 'mp4', - 'title': 'Soir 3', + 'title': 'Soir 3 - Émission du jeudi 22 août 2019', 'upload_date': '20190822', 'timestamp': 1566510730, 'thumbnail': r're:^https?://.*\.jpe?g$', @@ -398,7 +399,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): 'info_dict': { 'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482', 'ext': 'mp4', - 'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021', + 'title': 'Journal 20h00 - Covid-19 : une situation catastrophique à New Dehli', 'thumbnail': r're:^https?://.*\.jpe?g$', 'duration': 76, 'timestamp': 1619028518, @@ -438,6 +439,18 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): 'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080', }, 'add_ie': ['Dailymotion'], + 'skip': 'Broken Dailymotion link', + }, { + 'url': 'https://www.franceinfo.fr/monde/usa/presidentielle/donald-trump/etats-unis-un-risque-d-embrasement-apres-la-mort-d-un-manifestant_7764542.html', + 'info_dict': { + 'id': 'f920fcc2-fa20-11f0-ac98-57a09c50f7ce', + 'ext': 'mp4', + 'title': 'Affaires sensibles - Manifestant tué Le risque d\'embrasement', + 'duration': 118, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1769367756, + 'upload_date': '20260125', + }, }, { 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', 'only_matching': True, @@ -445,6 +458,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): # "
[0-9]+)' - _TESTS = [{ - 'url': 'https://watch.thechosen.tv/video/184683594325', - 'md5': '3f878b689588c71b38ec9943c54ff5b0', - 'info_dict': { - 'id': '184683594325', - 'ext': 'mp4', - 'title': 'Season 3 Episode 2: Two by Two', - 'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'duration': 4212, - 'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/', - 'timestamp': 1698954546, - 'upload_date': '20231102', - 'modified_timestamp': int, - 'modified_date': str, - }, - }, { - 'url': 'https://watch.thechosen.tv/video/184683596189', - 'md5': 'd581562f9d29ce82f5b7770415334151', - 'info_dict': { - 'id': '184683596189', - 'ext': 'mp4', - 'title': 'Season 4 Episode 8: Humble', - 'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'duration': 5092, - 'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/', - 'timestamp': 1715019474, - 'upload_date': '20240506', - 'modified_timestamp': int, - 'modified_date': str, - }, - }] - - -class TheChosenGroupIE(FrontroGroupBaseIE): - _CHANNEL_ID = '12884901895' - _VIDEO_EXTRACTOR = TheChosenIE - _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s' - - _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://watch.thechosen.tv/group/309237658592', - 'info_dict': { - 'id': '309237658592', - 'title': 'Season 3', - 'timestamp': 1746203969, - 'upload_date': '20250502', - 'modified_timestamp': int, - 'modified_date': str, - }, - 'playlist_count': 8, - }] diff --git a/plugins/youtube_download/yt_dlp/extractor/getcourseru.py b/plugins/youtube_download/yt_dlp/extractor/getcourseru.py index 2d923cf..e2b3a7d 100644 --- a/plugins/youtube_download/yt_dlp/extractor/getcourseru.py +++ b/plugins/youtube_download/yt_dlp/extractor/getcourseru.py @@ -59,7 +59,7 @@ class GetCourseRuIE(InfoExtractor): 'marafon.mani-beauty.com', 'on.psbook.ru', ] - _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' + _BASE_URL_RE = rf'https?://(?:(?!player02\.)[a-zA-Z0-9-]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _VALID_URL = [ rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P[^?#]+)', rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', diff --git a/plugins/youtube_download/yt_dlp/extractor/lbry.py b/plugins/youtube_download/yt_dlp/extractor/lbry.py index e6eef13..192d612 100644 --- a/plugins/youtube_download/yt_dlp/extractor/lbry.py +++ b/plugins/youtube_download/yt_dlp/extractor/lbry.py @@ -95,6 +95,7 @@ class LBRYBaseIE(InfoExtractor): '_type': 'url', 'id': item['claim_id'], 'url': self._permanent_url(url, item['name'], item['claim_id']), + 'ie_key': 'LBRY', } def _playlist_entries(self, url, display_id, claim_param, metadata): diff --git a/plugins/youtube_download/yt_dlp/extractor/learningonscreen.py b/plugins/youtube_download/yt_dlp/extractor/learningonscreen.py index f4b51e6..aff59e0 100644 --- a/plugins/youtube_download/yt_dlp/extractor/learningonscreen.py +++ b/plugins/youtube_download/yt_dlp/extractor/learningonscreen.py @@ -29,7 +29,7 @@ class LearningOnScreenIE(InfoExtractor): }] def _real_initialize(self): - if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): + if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-LOS-LIVE'): self.raise_login_required(method='session_cookies') def _real_extract(self, url): diff --git a/plugins/youtube_download/yt_dlp/extractor/locipo.py b/plugins/youtube_download/yt_dlp/extractor/locipo.py new file mode 100644 index 0000000..e4bad7f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/locipo.py @@ -0,0 +1,209 @@ +import functools +import math + +from .streaks import StreaksBaseIE +from ..networking import HEADRequest +from ..utils import ( + InAdvancePagedList, + clean_html, + js_to_json, + parse_iso8601, + parse_qs, + str_or_none, +) +from ..utils.traversal import require, traverse_obj + + +class LocipoBaseIE(StreaksBaseIE): + _API_BASE = 'https://web-api.locipo.jp' + _BASE_URL = 'https://locipo.jp' + _UUID_RE = r'[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}' + + def _call_api(self, path, item_id, note, fatal=True): + return self._download_json( + f'{self._API_BASE}/{path}', item_id, + f'Downloading {note} API JSON', + f'Unable to download {note} API JSON', + fatal=fatal) + + +class LocipoIE(LocipoBaseIE): + _VALID_URL = [ + fr'https?://locipo\.jp/creative/(?P{LocipoBaseIE._UUID_RE})', + fr'https?://locipo\.jp/embed/?\?(?:[^#]+&)?id=(?P{LocipoBaseIE._UUID_RE})', + ] + _TESTS = [{ + 'url': 'https://locipo.jp/creative/fb5ffeaa-398d-45ce-bb49-0e221b5f94f1', + 'info_dict': { + 'id': 'fb5ffeaa-398d-45ce-bb49-0e221b5f94f1', + 'ext': 'mp4', + 'title': 'リアルカレカノ#4 ~伊達さゆりと勉強しよっ?~', + 'description': 'md5:70a40c202f3fb7946b61e55fa015094c', + 'display_id': '5a2947fe596441f5bab88a61b0432d0d', + 'live_status': 'not_live', + 'modified_date': r're:\d{8}', + 'modified_timestamp': int, + 'release_timestamp': 1711789200, + 'release_date': '20240330', + 'series': 'リアルカレカノ', + 'series_id': '1142', + 'tags': 'count:4', + 'thumbnail': r're:https?://.+\.(?:jpg|png)', + 'timestamp': 1756984919, + 'upload_date': '20250904', + 'uploader': '東海テレビ', + 'uploader_id': 'locipo-prod', + }, + }, { + 'url': 'https://locipo.jp/embed/?id=71a334a0-2b25-406f-9d96-88f341f571c2', + 'info_dict': { + 'id': '71a334a0-2b25-406f-9d96-88f341f571c2', + 'ext': 'mp4', + 'title': '#1 オーディション/ゲスト伊藤美来、豊田萌絵', + 'description': 'md5:5bbcf532474700439cf56ceb6a15630e', + 'display_id': '0ab32634b884499a84adb25de844c551', + 'live_status': 'not_live', + 'modified_date': r're:\d{8}', + 'modified_timestamp': int, + 'release_timestamp': 1751623200, + 'release_date': '20250704', + 'series': '声優ラジオのウラカブリ~Locipo出張所~', + 'series_id': '1454', + 'tags': 'count:6', + 'thumbnail': r're:https?://.+\.(?:jpg|png)', + 'timestamp': 1757002966, + 'upload_date': '20250904', + 'uploader': 'テレビ愛知', + 'uploader_id': 'locipo-prod', + }, + }, { + 'url': 'https://locipo.jp/creative/bff9950d-229b-4fe9-911a-7fa71a232f35?list=69a5b15c-901f-4828-a336-30c0de7612d3', + 'info_dict': { + 'id': '69a5b15c-901f-4828-a336-30c0de7612d3', + 'title': '見て・乗って・語りたい。 東海の鉄道沼', + }, + 'playlist_mincount': 3, + }, { + 'url': 'https://locipo.jp/creative/a0751a7f-c7dd-4a10-a7f1-e12720bdf16c?list=006cff3f-ba74-42f0-b4fd-241486ebda2b', + 'info_dict': { + 'id': 'a0751a7f-c7dd-4a10-a7f1-e12720bdf16c', + 'ext': 'mp4', + 'title': '#839 人間真空パック', + 'description': 'md5:9fe190333b6975c5001c8c9cbe20d276', + 'display_id': 'c2b4c9f4a6d648bd8e3c320e384b9d56', + 'live_status': 'not_live', + 'modified_date': r're:\d{8}', + 'modified_timestamp': int, + 'release_timestamp': 1746239400, + 'release_date': '20250503', + 'series': 'でんじろう先生のはぴエネ!', + 'series_id': '202', + 'tags': 'count:3', + 'thumbnail': r're:https?://.+\.(?:jpg|png)', + 'timestamp': 1756975909, + 'upload_date': '20250904', + 'uploader': '中京テレビ', + 'uploader_id': 'locipo-prod', + }, + 'params': {'noplaylist': True}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + playlist_id = traverse_obj(parse_qs(url), ('list', -1, {str})) + if self._yes_playlist(playlist_id, video_id): + return self.url_result( + f'{self._BASE_URL}/playlist/{playlist_id}', LocipoPlaylistIE) + + creatives = self._call_api(f'creatives/{video_id}', video_id, 'Creatives') + media_id = traverse_obj(creatives, ('media_id', {str}, {require('Streaks media ID')})) + + webpage = self._download_webpage(url, video_id) + config = self._search_json( + r'window\.__NUXT__\.config\s*=', webpage, 'config', video_id, transform_source=js_to_json) + api_key = traverse_obj(config, ('public', 'streaksVodPlaybackApiKey', {str}, {require('api key')})) + + return { + **self._extract_from_streaks_api('locipo-prod', media_id, headers={ + 'Origin': 'https://locipo.jp', + 'X-Streaks-Api-Key': api_key, + }), + **traverse_obj(creatives, { + 'title': ('name', {clean_html}), + 'description': ('description', {clean_html}, filter), + 'release_timestamp': ('publication_started_at', {parse_iso8601}), + 'tags': ('keyword', {clean_html}, {lambda x: x.split(',')}, ..., {str.strip}, filter), + 'uploader': ('company', 'name', {clean_html}, filter), + }), + **traverse_obj(creatives, ('series', { + 'series': ('name', {clean_html}, filter), + 'series_id': ('id', {str_or_none}), + })), + 'id': video_id, + } + + +class LocipoPlaylistIE(LocipoBaseIE): + _VALID_URL = [ + fr'https?://locipo\.jp/(?Pplaylist)/(?P{LocipoBaseIE._UUID_RE})', + r'https?://locipo\.jp/(?Pseries)/(?P\d+)', + ] + _TESTS = [{ + 'url': 'https://locipo.jp/playlist/35d3dd2b-531d-4824-8575-b1c527d29538', + 'info_dict': { + 'id': '35d3dd2b-531d-4824-8575-b1c527d29538', + 'title': 'レシピ集', + }, + 'playlist_mincount': 135, + }, { + # Redirects to https://locipo.jp/series/1363 + 'url': 'https://locipo.jp/playlist/fef7c4fb-741f-4d6a-a3a6-754f354302a2', + 'info_dict': { + 'id': '1363', + 'title': 'CBCアナウンサー公式【みてちょてれび】', + 'description': 'md5:50a1b23e63112d5c06c882835c8c1fb1', + }, + 'playlist_mincount': 38, + }, { + 'url': 'https://locipo.jp/series/503', + 'info_dict': { + 'id': '503', + 'title': 'FishingLover東海', + 'description': '東海地区の釣り場でフィッシングの魅力を余すところなくご紹介!!', + }, + 'playlist_mincount': 223, + }] + _PAGE_SIZE = 100 + + def _fetch_page(self, path, playlist_id, page): + creatives = self._download_json( + f'{self._API_BASE}/{path}/{playlist_id}/creatives', + playlist_id, f'Downloading page {page + 1}', query={ + 'premium': False, + 'live': False, + 'limit': self._PAGE_SIZE, + 'offset': page * self._PAGE_SIZE, + }) + + for video_id in traverse_obj(creatives, ('items', ..., 'id', {str})): + yield self.url_result(f'{self._BASE_URL}/creative/{video_id}', LocipoIE) + + def _real_extract(self, url): + playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id') + if urlh := self._request_webpage(HEADRequest(url), playlist_id, fatal=False): + playlist_type, playlist_id = self._match_valid_url(urlh.url).group('type', 'id') + + path = 'playlists' if playlist_type == 'playlist' else 'series' + creatives = self._call_api( + f'{path}/{playlist_id}/creatives', playlist_id, path.capitalize()) + + entries = InAdvancePagedList( + functools.partial(self._fetch_page, path, playlist_id), + math.ceil(int(creatives['total']) / self._PAGE_SIZE), self._PAGE_SIZE) + + return self.playlist_result( + entries, playlist_id, + **traverse_obj(creatives, ('items', ..., playlist_type, { + 'title': ('name', {clean_html}, filter), + 'description': ('description', {clean_html}, filter), + }, any))) diff --git a/plugins/youtube_download/yt_dlp/extractor/matchitv.py b/plugins/youtube_download/yt_dlp/extractor/matchitv.py new file mode 100644 index 0000000..7e49a42 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/matchitv.py @@ -0,0 +1,38 @@ +from .common import InfoExtractor +from ..utils import join_nonempty, unified_strdate +from ..utils.traversal import traverse_obj + + +class MatchiTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?matchi\.tv/watch/?\?(?:[^#]+&)?s=(?P[0-9a-zA-Z]+)' + _TESTS = [{ + 'url': 'https://matchi.tv/watch?s=0euhjzrxsjm', + 'info_dict': { + 'id': '0euhjzrxsjm', + 'ext': 'mp4', + 'title': 'Court 2 at Stratford Padel Club 2024-07-13T18:32:24', + 'thumbnail': 'https://thumbnails.padelgo.tv/0euhjzrxsjm.jpg', + 'upload_date': '20240713', + }, + }, { + 'url': 'https://matchi.tv/watch?s=FkKDJ9SvAx1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + loaded_media = traverse_obj( + self._search_nextjs_data(webpage, video_id, fatal=False), + ('props', 'pageProps', 'loadedMedia', {dict})) or {} + start_date_time = traverse_obj(loaded_media, ('startDateTime', {str})) + + return { + 'id': video_id, + 'title': join_nonempty(loaded_media.get('courtDescription'), start_date_time, delim=' '), + 'thumbnail': f'https://thumbnails.padelgo.tv/{video_id}.jpg', + 'upload_date': unified_strdate(start_date_time), + 'formats': self._extract_m3u8_formats( + f'https://streams.padelgo.tv/v2/streams/m3u8/{video_id}/anonymous/playlist.m3u8', + video_id, 'mp4', m3u8_id='hls'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/mixcloud.py b/plugins/youtube_download/yt_dlp/extractor/mixcloud.py index 852670f..c555b63 100644 --- a/plugins/youtube_download/yt_dlp/extractor/mixcloud.py +++ b/plugins/youtube_download/yt_dlp/extractor/mixcloud.py @@ -25,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor): %s } }''' % (lookup_key, username, f', slug: "{slug}"' if slug else '', object_fields), # noqa: UP031 - })['data'][lookup_key] + }, impersonate=True)['data'][lookup_key] class MixcloudIE(MixcloudBaseIE): diff --git a/plugins/youtube_download/yt_dlp/extractor/neteasemusic.py b/plugins/youtube_download/yt_dlp/extractor/neteasemusic.py index 8f3a7d2..e3b5f47 100644 --- a/plugins/youtube_download/yt_dlp/extractor/neteasemusic.py +++ b/plugins/youtube_download/yt_dlp/extractor/neteasemusic.py @@ -156,18 +156,36 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'id': '17241424', 'ext': 'mp3', 'title': 'Opus 28', - 'upload_date': '20080211', - 'timestamp': 1202745600, + 'upload_date': '20060912', + 'timestamp': 1158076800, 'duration': 263, 'thumbnail': r're:^http.*\.jpg', - 'album': 'Piano Solos Vol. 2', + 'album': 'Piano Solos, Vol. 2', 'album_artist': 'Dustin O\'Halloran', 'average_rating': int, - 'description': '[00:05.00]纯音乐,请欣赏\n', + 'description': 'md5:b566b92c55ca348df65d206c5d689576', 'album_artists': ['Dustin O\'Halloran'], 'creators': ['Dustin O\'Halloran'], 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, }, + }, { + 'url': 'https://music.163.com/#/song?id=2755669231', + 'info_dict': { + 'id': '2755669231', + 'ext': 'mp3', + 'title': '十二月-Departure', + 'upload_date': '20251111', + 'timestamp': 1762876800, + 'duration': 188, + 'thumbnail': r're:^http.*\.jpg', + 'album': '円', + 'album_artist': 'ひとひら', + 'average_rating': int, + 'description': 'md5:deee249c8c9c3e2c54ecdab36e87d174', + 'album_artists': ['ひとひら'], + 'creators': ['ひとひら'], + 'subtitles': {'lyrics': [{'ext': 'lrc', 'data': 'md5:d32b4425a5d6c9fa249ca6e803dd0401'}]}, + }, }, { 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', 'md5': 'b896be78d8d34bd7bb665b26710913ff', @@ -241,9 +259,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'lyrics': [{'data': original, 'ext': 'lrc'}], } - lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' - original_ts_texts = re.findall(lyrics_expr, original) - translation_ts_dict = dict(re.findall(lyrics_expr, translated)) + def collect_lyrics(lrc): + lyrics_expr = r'\[([0-9]{2}):([0-9]{2})[:.]([0-9]{2,})\]([^\n]+)' + matches = re.findall(lyrics_expr, lrc) + return ( + (f'[{minute}:{sec}.{msec}]', text) + for minute, sec, msec, text in matches + ) + + original_ts_texts = collect_lyrics(original) + translation_ts_dict = dict(collect_lyrics(translated)) merged = '\n'.join( join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ') diff --git a/plugins/youtube_download/yt_dlp/extractor/odnoklassniki.py b/plugins/youtube_download/yt_dlp/extractor/odnoklassniki.py index c9bee90..8d3700f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/odnoklassniki.py +++ b/plugins/youtube_download/yt_dlp/extractor/odnoklassniki.py @@ -9,13 +9,13 @@ from ..utils import ( int_or_none, qualities, smuggle_url, - traverse_obj, unescapeHTML, unified_strdate, unsmuggle_url, url_or_none, urlencode_postdata, ) +from ..utils.traversal import find_element, traverse_obj class OdnoklassnikiIE(InfoExtractor): @@ -264,9 +264,7 @@ class OdnoklassnikiIE(InfoExtractor): note='Downloading desktop webpage', headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {}) - error = self._search_regex( - r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', - webpage, 'error', default=None) + error = traverse_obj(webpage, {find_element(cls='vp_video_stub_txt')}) # Direct link from boosty if (error == 'The author of this video has not been found or is blocked' and not smuggled.get('referrer') and mode == 'videoembed'): diff --git a/plugins/youtube_download/yt_dlp/extractor/opencast.py b/plugins/youtube_download/yt_dlp/extractor/opencast.py index a4b0a19..5b7710a 100644 --- a/plugins/youtube_download/yt_dlp/extractor/opencast.py +++ b/plugins/youtube_download/yt_dlp/extractor/opencast.py @@ -33,7 +33,8 @@ class OpencastBaseIE(InfoExtractor): vid\.igb\.illinois\.edu| cursosabertos\.c3sl\.ufpr\.br| mcmedia\.missioncollege\.org| - clases\.odon\.edu\.uy + clases\.odon\.edu\.uy| + oc-p\.uni-jena\.de )''' _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' @@ -106,7 +107,7 @@ class OpencastBaseIE(InfoExtractor): class OpencastIE(OpencastBaseIE): _VALID_URL = rf'''(?x) - https?://(?P{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\? + https?://(?P{OpencastBaseIE._INSTANCES_RE})/paella[0-9]*/ui/watch\.html\? (?:[^#]+&)?id=(?P{OpencastBaseIE._UUID_RE})''' _API_BASE = 'https://%s/search/episode.json?id=%s' @@ -131,8 +132,12 @@ class OpencastIE(OpencastBaseIE): def _real_extract(self, url): host, video_id = self._match_valid_url(url).group('host', 'id') - return self._parse_mediapackage( - self._call_api(host, video_id)['search-results']['result']['mediapackage']) + response = self._call_api(host, video_id) + package = traverse_obj(response, ( + ('search-results', 'result'), + ('result', ...), # Path needed for oc-p.uni-jena.de + 'mediapackage', {dict}, any)) or {} + return self._parse_mediapackage(package) class OpencastPlaylistIE(OpencastBaseIE): diff --git a/plugins/youtube_download/yt_dlp/extractor/patreon.py b/plugins/youtube_download/yt_dlp/extractor/patreon.py index b511994..1828b81 100644 --- a/plugins/youtube_download/yt_dlp/extractor/patreon.py +++ b/plugins/youtube_download/yt_dlp/extractor/patreon.py @@ -1,6 +1,5 @@ import functools import itertools -import urllib.parse from .common import InfoExtractor from .sproutvideo import VidsIoIE @@ -11,30 +10,38 @@ from ..utils import ( ExtractorError, clean_html, determine_ext, + extract_attributes, + float_or_none, int_or_none, mimetype2ext, parse_iso8601, smuggle_url, str_or_none, + update_url_query, url_or_none, urljoin, ) -from ..utils.traversal import require, traverse_obj, value +from ..utils.traversal import ( + find_elements, + require, + traverse_obj, + trim_str, + value, +) class PatreonBaseIE(InfoExtractor): @functools.cached_property def patreon_user_agent(self): - # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection. - # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in + # Patreon mobile UA yields higher res m3u8 for locked posts, but gives 401 if not logged-in if self._get_cookies('https://www.patreon.com/').get('session_id'): - return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)' - return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' + return 'Patreon/126.9.0.15 (Android; Android 14; Scale/2.10)' + return None def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None): if headers is None: headers = {} - if 'User-Agent' not in headers: + if 'User-Agent' not in headers and self.patreon_user_agent: headers['User-Agent'] = self.patreon_user_agent if query: query.update({'json-api-version': 1.0}) @@ -43,7 +50,9 @@ class PatreonBaseIE(InfoExtractor): return self._download_json( f'https://www.patreon.com/api/{ep}', item_id, note=note if note else 'Downloading API JSON', - query=query, fatal=fatal, headers=headers) + query=query, fatal=fatal, headers=headers, + # If not using Patreon mobile UA, we need impersonation due to Cloudflare + impersonate=not self.patreon_user_agent) except ExtractorError as e: if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json': raise @@ -121,6 +130,7 @@ class PatreonIE(PatreonBaseIE): 'channel_is_verified': True, 'chapters': 'count:4', 'timestamp': 1423689666, + 'media_type': 'video', }, 'params': { 'noplaylist': True, @@ -161,7 +171,7 @@ class PatreonIE(PatreonBaseIE): 'uploader_url': 'https://www.patreon.com/loish', 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2', 'title': 'VIDEO // sketchbook flipthrough', - 'uploader': 'Loish ', + 'uploader': 'Loish', 'tags': ['sketchbook', 'video'], 'channel_id': '1641751', 'channel_url': 'https://www.patreon.com/loish', @@ -274,8 +284,73 @@ class PatreonIE(PatreonBaseIE): 'channel_id': '9346307', }, 'params': {'getcomments': True}, + }, { + # Inlined media in post; uses _extract_from_media_api + 'url': 'https://www.patreon.com/posts/scottfalco-146966245', + 'info_dict': { + 'id': '146966245', + 'ext': 'mp4', + 'title': 'scottfalco 1080', + 'description': 'md5:a3f29bbd0a46b4821ec3400957c98aa2', + 'uploader': 'Insanimate', + 'uploader_id': '2828146', + 'uploader_url': 'https://www.patreon.com/Insanimate', + 'channel_id': '6260877', + 'channel_url': 'https://www.patreon.com/Insanimate', + 'channel_follower_count': int, + 'comment_count': int, + 'like_count': int, + 'duration': 7.833333, + 'timestamp': 1767061800, + 'upload_date': '20251230', + }, }] _RETURN_TYPE = 'video' + _HTTP_HEADERS = { + # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo. + # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s + 'referer': 'https://www.patreon.com/', + } + + def _extract_from_media_api(self, media_id): + attributes = traverse_obj( + self._call_api(f'media/{media_id}', media_id, fatal=False), + ('data', 'attributes', {dict})) + if not attributes: + return None + + info_dict = traverse_obj(attributes, { + 'title': ('file_name', {lambda x: x.rpartition('.')[0]}), + 'timestamp': ('created_at', {parse_iso8601}), + 'duration': ('display', 'duration', {float_or_none}), + }) + info_dict['id'] = media_id + + playback_url = traverse_obj( + attributes, ('display', (None, 'viewer_playback_data'), 'url', {url_or_none}, any)) + download_url = traverse_obj(attributes, ('download_url', {url_or_none})) + + if playback_url and mimetype2ext(attributes.get('mimetype')) == 'm3u8': + info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles( + playback_url, media_id, 'mp4', fatal=False, headers=self._HTTP_HEADERS) + for f in info_dict['formats']: + f['http_headers'] = self._HTTP_HEADERS + if transcript_url := traverse_obj(attributes, ('display', 'transcript_url', {url_or_none})): + info_dict['subtitles'].setdefault('en', []).append({ + 'url': transcript_url, + 'ext': 'vtt', + }) + elif playback_url or download_url: + info_dict['formats'] = [{ + # If playback_url is available, download_url is a duplicate lower resolution format + 'url': playback_url or download_url, + 'vcodec': 'none' if attributes.get('media_type') != 'video' else None, + }] + + if not info_dict.get('formats'): + return None + + return info_dict def _real_extract(self, url): video_id = self._match_id(url) @@ -299,6 +374,7 @@ class PatreonIE(PatreonBaseIE): 'comment_count': ('comment_count', {int_or_none}), }) + seen_media_ids = set() entries = [] idx = 0 for include in traverse_obj(post, ('included', lambda _, v: v['type'])): @@ -320,6 +396,8 @@ class PatreonIE(PatreonBaseIE): 'url': download_url, 'alt_title': traverse_obj(media_attributes, ('file_name', {str})), }) + if media_id := traverse_obj(include, ('id', {str})): + seen_media_ids.add(media_id) elif include_type == 'user': info.update(traverse_obj(include, { @@ -340,34 +418,29 @@ class PatreonIE(PatreonBaseIE): 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), })) - # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo. - # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s - headers = {'referer': 'https://www.patreon.com/'} + if embed_url := traverse_obj(attributes, ('embed', 'url', {url_or_none})): + # Convert useless vimeo.com URLs to useful player.vimeo.com embed URLs + vimeo_id, vimeo_hash = self._search_regex( + r'//vimeo\.com/(\d+)(?:/([\da-f]+))?', embed_url, + 'vimeo id', group=(1, 2), default=(None, None)) + if vimeo_id: + embed_url = update_url_query( + f'https://player.vimeo.com/video/{vimeo_id}', + {'h': vimeo_hash or []}) + if VimeoIE.suitable(embed_url): + entry = self.url_result( + VimeoIE._smuggle_referrer(embed_url, self._HTTP_HEADERS['referer']), + VimeoIE, url_transparent=True) + else: + entry = self.url_result(smuggle_url(embed_url, self._HTTP_HEADERS)) - # handle Vimeo embeds - if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': - v_url = urllib.parse.unquote(self._html_search_regex( - r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', - traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') - if url_or_none(v_url) and self._request_webpage( - v_url, video_id, 'Checking Vimeo embed URL', headers=headers, - fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection - entries.append(self.url_result( - VimeoIE._smuggle_referrer(v_url, headers['referer']), - VimeoIE, url_transparent=True)) - - embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) - if embed_url and (urlh := self._request_webpage( - embed_url, video_id, 'Checking embed URL', headers=headers, - fatal=False, errnote=False, expected_status=403)): - # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need - # to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag - meta_description = clean_html(self._html_search_meta( - 'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None)) - # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie - if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page') - or VidsIoIE.suitable(embed_url)): - entries.append(self.url_result(smuggle_url(embed_url, headers))) + if urlh := self._request_webpage( + embed_url, video_id, 'Checking embed URL', headers=self._HTTP_HEADERS, + fatal=False, errnote=False, expected_status=(403, 429), # Ignore Vimeo 429's + ): + # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie + if VidsIoIE.suitable(embed_url) or urlh.status != 403: + entries.append(entry) post_file = traverse_obj(attributes, ('post_file', {dict})) if post_file: @@ -381,13 +454,27 @@ class PatreonIE(PatreonBaseIE): }) elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles( - post_file['url'], video_id, headers=headers) + post_file['url'], video_id, headers=self._HTTP_HEADERS) + for f in formats: + f['http_headers'] = self._HTTP_HEADERS entries.append({ 'id': video_id, 'formats': formats, 'subtitles': subtitles, - 'http_headers': headers, }) + if media_id := traverse_obj(post_file, ('media_id', {int}, {str_or_none})): + seen_media_ids.add(media_id) + + for media_id in traverse_obj(attributes, ( + 'content', {find_elements(attr='data-media-id', value=r'\d+', regex=True, html=True)}, + ..., {extract_attributes}, 'data-media-id', + )): + # Inlined media may be duplicates of what was extracted above + if media_id in seen_media_ids: + continue + if media := self._extract_from_media_api(media_id): + entries.append(media) + seen_media_ids.add(media_id) can_view_post = traverse_obj(attributes, 'current_user_can_view') comments = None @@ -538,14 +625,13 @@ class PatreonCampaignIE(PatreonBaseIE): 'info_dict': { 'id': '9631148', 'title': 'Anything Else?', - 'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08', + 'description': 'md5:b2f20eec4cb5520d9a4be4971f28add5', 'uploader': 'dan ', 'uploader_id': '13852412', 'uploader_url': 'https://www.patreon.com/anythingelse', 'channel': 'Anything Else?', 'channel_id': '9631148', 'channel_url': 'https://www.patreon.com/anythingelse', - 'channel_follower_count': int, 'age_limit': 0, 'thumbnail': r're:https?://.+/.+', }, @@ -590,16 +676,15 @@ class PatreonCampaignIE(PatreonBaseIE): break def _real_extract(self, url): - campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: - webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) - campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), ( - 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) - if not campaign_id: - campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( - ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'), - 'id', {str}, any, {require('campaign ID')})) + results = self._call_api('search', vanity, query={ + 'q': vanity, + 'page[size]': '5', + })['data'] + campaign_id = traverse_obj(results, ( + lambda _, v: v['type'] == 'campaign-document' and v['attributes']['url'].lower().endswith(f'/{vanity.lower()}'), + 'id', {trim_str(start='campaign_')}, filter, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', diff --git a/plugins/youtube_download/yt_dlp/extractor/pbs.py b/plugins/youtube_download/yt_dlp/extractor/pbs.py index dde734f..f9fbb2b 100644 --- a/plugins/youtube_download/yt_dlp/extractor/pbs.py +++ b/plugins/youtube_download/yt_dlp/extractor/pbs.py @@ -453,6 +453,23 @@ class PBSIE(InfoExtractor): 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', 'only_matching': True, }, + { + # Next.js v13+, see https://github.com/yt-dlp/yt-dlp/issues/13299 + 'url': 'https://www.pbs.org/video/caregiving', + 'info_dict': { + 'id': '3101776876', + 'ext': 'mp4', + 'title': 'Caregiving - Caregiving', + 'description': 'A documentary revealing America’s caregiving crisis through intimate stories and expert insight.', + 'display_id': 'caregiving', + 'duration': 6783, + 'thumbnail': 'https://image.pbs.org/video-assets/BSrSkcc-asset-mezzanine-16x9-nlcxQts.jpg', + 'chapters': [], + }, + 'params': { + 'skip_download': True, + }, + }, ] _ERRORS = { 101: 'We\'re sorry, but this video is not yet available.', @@ -506,6 +523,7 @@ class PBSIE(InfoExtractor): r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", r']+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/ r']+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/ + r'\\"videoTPMediaId\\":\\\"(\d+)\\"', # Next.js v13, e.g. https://www.pbs.org/video/caregiving r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)', # last pattern to avoid false positives ] diff --git a/plugins/youtube_download/yt_dlp/extractor/peertube.py b/plugins/youtube_download/yt_dlp/extractor/peertube.py index 4c8205f..4445485 100644 --- a/plugins/youtube_download/yt_dlp/extractor/peertube.py +++ b/plugins/youtube_download/yt_dlp/extractor/peertube.py @@ -1322,7 +1322,7 @@ class PeerTubeIE(InfoExtractor): ) (?P{_UUID_RE}) ''' - _EMBED_REGEX = [r'''(?x)]+\bsrc=["\'](?P(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})'''] + _EMBED_REGEX = [rf'''(?x)]+\bsrc=["\'](?P(?:https?:)?//{_INSTANCES_RE}/videos/embed/{_UUID_RE})'''] _TESTS = [{ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', 'md5': '8563064d245a4be5705bddb22bb00a28', diff --git a/plugins/youtube_download/yt_dlp/extractor/pornhub.py b/plugins/youtube_download/yt_dlp/extractor/pornhub.py index cdfa3f1..3fc802e 100644 --- a/plugins/youtube_download/yt_dlp/extractor/pornhub.py +++ b/plugins/youtube_download/yt_dlp/extractor/pornhub.py @@ -128,7 +128,7 @@ class PornHubIE(PornHubBaseIE): _VALID_URL = rf'''(?x) https?:// (?: - (?:[^/]+\.)? + (?:[a-zA-Z0-9.-]+\.)? {PornHubBaseIE._PORNHUB_HOST_RE} /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ @@ -506,6 +506,7 @@ class PornHubIE(PornHubBaseIE): 'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}), }), 'subtitles': subtitles, + 'http_headers': {'Referer': f'https://www.{host}/'}, }, info) @@ -533,7 +534,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = rf'(?Phttps?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = rf'(?Phttps?://(?:[a-zA-Z0-9.-]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, diff --git a/plugins/youtube_download/yt_dlp/extractor/rtp.py b/plugins/youtube_download/yt_dlp/extractor/rtp.py index 03e9859..b699484 100644 --- a/plugins/youtube_download/yt_dlp/extractor/rtp.py +++ b/plugins/youtube_download/yt_dlp/extractor/rtp.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + make_archive_id, parse_duration, parse_iso8601, url_or_none, @@ -16,12 +17,12 @@ from ..utils.traversal import traverse_obj class RTPIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:[^/#?]+/)?p(?P\d+)/(?Pe\d+)' + _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:[^/#?]+/)?(?Pp\d+)/(?Pe\d+)(?:/[^/#?]+/(?P\d+))?' _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'md5': 'e736ce0c665e459ddb818546220b4ef8', 'info_dict': { - 'id': 'e174042', + 'id': '395769', 'ext': 'mp3', 'title': 'Paixões Cruzadas', 'description': 'md5:af979e58ba0ab73f78435fc943fdb070', @@ -32,12 +33,15 @@ class RTPIE(InfoExtractor): 'modified_date': '20190327', 'timestamp': 1417219200, 'upload_date': '20141129', + 'episode_id': 'e174042', + 'series_id': 'p405', + '_old_archive_ids': ['rtp e174042'], }, }, { 'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril', 'md5': '5b4859940e3adef61247a77dfb76046a', 'info_dict': { - 'id': 'e757904', + 'id': '1226642', 'ext': 'mp4', 'title': 'Estudar ou não estudar', 'description': 'md5:3bfd7eb8bebfd5711a08df69c9c14c35', @@ -50,13 +54,16 @@ class RTPIE(InfoExtractor): 'episode_number': 2, 'episode': 'Estudar ou não estudar', 'modified_date': '20240404', + 'episode_id': 'e757904', + 'series_id': 'p13166', + '_old_archive_ids': ['rtp e757904'], }, }, { # Episode not accessible through API 'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/e500050/portugues-1-ano', 'md5': '57660c0b46db9f22118c52cbd65975e4', 'info_dict': { - 'id': 'e500050', + 'id': '871639', 'ext': 'mp4', 'title': 'Português - 1.º ano', 'duration': 1669.0, @@ -64,6 +71,67 @@ class RTPIE(InfoExtractor): 'upload_date': '20201020', 'timestamp': 1603180799, 'thumbnail': 'https://cdn-images.rtp.pt/EPG/imagens/39482_59449_64850.png?v=3&w=860', + 'episode_id': 'e500050', + 'series_id': 'p7776', + '_old_archive_ids': ['rtp e500050'], + }, + 'expected_warnings': ['Episode data not found in API response; falling back to web extraction'], + }, { + # Ambiguous URL for 1st part of a multi-part episode without --no-playlist + 'url': 'https://www.rtp.pt/play/p14335/e877072/a-nossa-tarde', + 'info_dict': { + 'id': 'e877072', + 'title': 'A Nossa Tarde', + 'duration': 6545.0, + 'series': 'A Nossa Tarde', + 'series_id': 'p14335', + 'season': '2025', + 'episode_id': 'e877072', + 'timestamp': 1758560188, + 'upload_date': '20250922', + 'modified_timestamp': 1758563110, + 'modified_date': '20250922', + }, + 'playlist_count': 3, + }, { + # Ambiguous URL for 1st part of a multi-part episode with --no-playlist + 'url': 'https://www.rtp.pt/play/p14335/e877072/a-nossa-tarde', + 'md5': '2aa3c89c95e852d6f04168b95d0d0632', + 'info_dict': { + 'id': '1364711', + 'ext': 'mp4', + 'title': 'A Nossa Tarde', + 'duration': 1292.0, + 'thumbnail': r're:https://cdn-images\.rtp\.pt/multimedia/screenshots/p14335/p14335_1_20250922155118e161t0312\.jpg', + 'series': 'A Nossa Tarde', + 'series_id': 'p14335', + 'season': '2025', + 'episode_id': 'e877072', + 'timestamp': 1758560188, + 'upload_date': '20250922', + 'modified_timestamp': 1758563110, + 'modified_date': '20250922', + '_old_archive_ids': ['rtp e877072'], + }, + 'params': {'noplaylist': True}, + }, { + # Unambiguous URL for 2nd part of a multi-part episode + 'url': 'https://www.rtp.pt/play/p14335/e877072/a-nossa-tarde/1364744', + 'md5': 'b624767af558a557372a6fcd1dcdfa17', + 'info_dict': { + 'id': '1364744', + 'ext': 'mp4', + 'title': 'A Nossa Tarde', + 'duration': 3270.0, + 'thumbnail': r're:https://cdn-images\.rtp\.pt/multimedia/screenshots/p14335/p14335_2_20250922165718e161t0412\.jpg', + 'series': 'A Nossa Tarde', + 'series_id': 'p14335', + 'season': '2025', + 'episode_id': 'e877072', + 'timestamp': 1758560188, + 'upload_date': '20250922', + 'modified_timestamp': 1758563110, + 'modified_date': '20250922', }, }] @@ -92,19 +160,19 @@ class RTPIE(InfoExtractor): return None return url.replace('/drm-fps/', '/hls/').replace('/drm-dash/', '/dash/') - def _extract_formats(self, media_urls, episode_id): + def _extract_formats(self, media_urls, display_id): formats = [] subtitles = {} for media_url in set(traverse_obj(media_urls, (..., {url_or_none}, {self._cleanup_media_url}))): ext = determine_ext(media_url) if ext == 'm3u8': fmts, subs = self._extract_m3u8_formats_and_subtitles( - media_url, episode_id, m3u8_id='hls', fatal=False) + media_url, display_id, m3u8_id='hls', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif ext == 'mpd': fmts, subs = self._extract_mpd_formats_and_subtitles( - media_url, episode_id, mpd_id='dash', fatal=False) + media_url, display_id, mpd_id='dash', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: @@ -114,24 +182,12 @@ class RTPIE(InfoExtractor): }) return formats, subtitles - def _extract_from_api(self, program_id, episode_id): - auth_token = self._fetch_auth_token() - if not auth_token: - return - episode_data = traverse_obj(self._download_json( - f'https://www.rtp.pt/play/api/1/get-episode/{program_id}/{episode_id[1:]}', episode_id, - query={'include_assets': 'true', 'include_webparams': 'true'}, - headers={ - 'Accept': '*/*', - 'Authorization': f'Bearer {auth_token}', - 'User-Agent': self._USER_AGENT, - }, fatal=False), 'result', {dict}) - if not episode_data: - return - asset_urls = traverse_obj(episode_data, ('assets', 0, 'asset_url', {dict})) + def _extract_asset(self, asset_data, episode_id, episode_info, archive_compat=False): + asset_id = asset_data['asset_id'] + asset_urls = traverse_obj(asset_data, ('asset_url', {dict})) media_urls = traverse_obj(asset_urls, ( ((('hls', 'dash'), 'stream_url'), ('multibitrate', ('url_hls', 'url_dash'))),)) - formats, subtitles = self._extract_formats(media_urls, episode_id) + formats, subtitles = self._extract_formats(media_urls, asset_id) for sub_data in traverse_obj(asset_urls, ('subtitles', 'vtt_list', lambda _, v: url_or_none(v['file']))): subtitles.setdefault(sub_data.get('code') or 'pt', []).append({ @@ -140,17 +196,63 @@ class RTPIE(InfoExtractor): }) return { - 'id': episode_id, + **episode_info, + 'id': asset_id, + 'episode_id': episode_id, + # asset_id is a unique identifier for all RTP videos, while episode_id is duplicated + # across all parts of a multi-part episode. Older versions of this IE returned + # episode_id as the video id and would only download the first part of multi-part eps. + # For download archive compat, we should return the episode_id as the old archive id + # *only* when extracting single-part episodes OR the *first* part of a multi-part ep. + '_old_archive_ids': [make_archive_id(self, episode_id)] if archive_compat else None, 'formats': formats, 'subtitles': subtitles, - 'thumbnail': traverse_obj(episode_data, ('assets', 0, 'asset_thumbnail', {url_or_none})), + **traverse_obj(asset_data, { + 'thumbnail': ('asset_thumbnail', {url_or_none}), + 'duration': ('asset_duration', {parse_duration}), + 'webpage_url': ('web', 'url', {url_or_none}), + }), + } + + def _report_fallback_warning(self, missing_info_name='required info', display_id=None): + self.report_warning( + f'{missing_info_name.capitalize()} not found in API response; falling back to web extraction', + video_id=display_id) + + def _entries(self, assets, episode_id, episode_info): + # Only pass archive_compat=True for the first entry without an asset_id in its webpage_url + for idx, asset_data in enumerate(assets): + yield self._extract_asset(asset_data, episode_id, episode_info, archive_compat=not idx) + + def _extract_from_api(self, program_id, episode_id, asset_id): + auth_token = self._fetch_auth_token() + if not auth_token: + self._report_fallback_warning('auth token', episode_id) + return None + + episode_data = traverse_obj(self._download_json( + f'https://www.rtp.pt/play/api/1/get-episode/{program_id[1:]}/{episode_id[1:]}', + asset_id or episode_id, query={'include_assets': 'true', 'include_webparams': 'true'}, + headers={ + 'Accept': '*/*', + 'Authorization': f'Bearer {auth_token}', + 'User-Agent': self._USER_AGENT, + }, fatal=False), 'result', {dict}) + if not episode_data: + self._report_fallback_warning('episode data', episode_id) + return None + + episode_info = { + 'id': episode_id, # playlist id + 'episode_id': episode_id, + 'series_id': program_id, **traverse_obj(episode_data, ('episode', { 'title': (('episode_title', 'program_title'), {str}, filter, any), 'alt_title': ('episode_subtitle', {str}, filter), 'description': (('episode_description', 'episode_summary'), {str}, filter, any), 'timestamp': ('episode_air_date', {parse_iso8601(delimiter=' ')}), 'modified_timestamp': ('episode_lastchanged', {parse_iso8601(delimiter=' ')}), - 'duration': ('episode_duration_complete', {parse_duration}), + 'duration': ('episode_duration_complete', {parse_duration}), # playlist duration 'episode': ('episode_title', {str}, filter), 'episode_number': ('episode_number', {int_or_none}), 'season': ('program_season', {str}, filter), @@ -158,6 +260,30 @@ class RTPIE(InfoExtractor): })), } + assets = traverse_obj(episode_data, ('assets', lambda _, v: v['asset_id'])) + if not assets: + self._report_fallback_warning('asset IDs', episode_id) + return None + + if asset_id: + asset_data = traverse_obj(assets, (lambda _, v: v['asset_id'] == asset_id, any)) + if not asset_data: + self._report_fallback_warning(f'asset {asset_id}', episode_id) + return None + return self._extract_asset(asset_data, episode_id, episode_info) + + asset_data = assets[0] + + if self._yes_playlist( + len(assets) > 1 and episode_id, asset_data['asset_id'], + playlist_label='multi-part episode', video_label='individual part', + ): + return self.playlist_result( + self._entries(assets, episode_id, episode_info), **episode_info) + + # Pass archive_compat=True so we return _old_archive_ids for URLs without an asset_id + return self._extract_asset(asset_data, episode_id, episode_info, archive_compat=True) + _RX_OBFUSCATION = re.compile(r'''(?xs) atob\s*\(\s*decodeURIComponent\s*\(\s* (\[[0-9A-Za-z%,'"]*\]) @@ -172,25 +298,35 @@ class RTPIE(InfoExtractor): )).decode('iso-8859-1')), data) - def _extract_from_html(self, url, episode_id): - webpage = self._download_webpage(url, episode_id) + def _extract_from_html(self, url, program_id, episode_id, asset_id): + webpage = self._download_webpage(url, asset_id or episode_id) + if not asset_id: + asset_id = self._search_regex(r'\basset_id\s*:\s*"(\d+)"', webpage, 'asset ID') + old_archive_ids = [make_archive_id(self, episode_id)] + else: + old_archive_ids = None formats = [] subtitles = {} media_urls = traverse_obj(re.findall(r'(?:var\s+f\s*=|RTPPlayer\({[^}]+file:)\s*({[^}]+}|"[^"]+")', webpage), ( -1, (({self.__unobfuscate}, {js_to_json}, {json.loads}, {dict.values}, ...), {json.loads}))) - formats, subtitles = self._extract_formats(media_urls, episode_id) + formats, subtitles = self._extract_formats(media_urls, asset_id) return { - 'id': episode_id, + 'id': asset_id, + 'episode_id': episode_id, + 'series_id': program_id, 'formats': formats, 'subtitles': subtitles, 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, default=None), 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None), - **self._search_json_ld(webpage, episode_id, default={}), + **self._search_json_ld(webpage, asset_id, default={}), 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + '_old_archive_ids': old_archive_ids, } def _real_extract(self, url): - program_id, episode_id = self._match_valid_url(url).group('program_id', 'id') - return self._extract_from_api(program_id, episode_id) or self._extract_from_html(url, episode_id) + program_id, episode_id, asset_id = self._match_valid_url(url).group('program_id', 'episode_id', 'asset_id') + return ( + self._extract_from_api(program_id, episode_id, asset_id) + or self._extract_from_html(url, program_id, episode_id, asset_id)) diff --git a/plugins/youtube_download/yt_dlp/extractor/rumble.py b/plugins/youtube_download/yt_dlp/extractor/rumble.py index 757d699..c4a7971 100644 --- a/plugins/youtube_download/yt_dlp/extractor/rumble.py +++ b/plugins/youtube_download/yt_dlp/extractor/rumble.py @@ -405,7 +405,7 @@ class RumbleChannelIE(InfoExtractor): for video_url in traverse_obj( get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'), ): - yield self.url_result(urljoin('https://rumble.com', video_url)) + yield self.url_result(urljoin('https://rumble.com', video_url), RumbleIE) def _real_extract(self, url): url, playlist_id = self._match_valid_url(url).groups() diff --git a/plugins/youtube_download/yt_dlp/extractor/sauceplus.py b/plugins/youtube_download/yt_dlp/extractor/sauceplus.py index 75d7022..e0041a7 100644 --- a/plugins/youtube_download/yt_dlp/extractor/sauceplus.py +++ b/plugins/youtube_download/yt_dlp/extractor/sauceplus.py @@ -1,4 +1,4 @@ -from .floatplane import FloatplaneBaseIE +from .floatplane import FloatplaneBaseIE, FloatplaneChannelBaseIE class SaucePlusIE(FloatplaneBaseIE): @@ -39,3 +39,19 @@ class SaucePlusIE(FloatplaneBaseIE): def _real_initialize(self): if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'): self.raise_login_required() + + +class SaucePlusChannelIE(FloatplaneChannelBaseIE): + _VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' + _BASE_URL = 'https://www.sauceplus.com' + _RESULT_IE = SaucePlusIE + _PAGE_SIZE = 20 + _TESTS = [{ + 'url': 'https://www.sauceplus.com/channel/williamosman/home', + 'info_dict': { + 'id': 'williamosman', + 'title': 'William Osman', + 'description': 'md5:a67bc961d23c293b2c5308d84f34f26c', + }, + 'playlist_mincount': 158, + }] diff --git a/plugins/youtube_download/yt_dlp/extractor/sbs.py b/plugins/youtube_download/yt_dlp/extractor/sbs.py index 7edb521..c8be9bd 100644 --- a/plugins/youtube_download/yt_dlp/extractor/sbs.py +++ b/plugins/youtube_download/yt_dlp/extractor/sbs.py @@ -146,8 +146,8 @@ class SBSIE(InfoExtractor): 'release_year': ('releaseYear', {int_or_none}), 'duration': ('duration', ({float_or_none}, {parse_duration})), 'is_live': ('liveStream', {bool}), - 'age_limit': (('classificationID', 'contentRating'), {str.upper}, { - lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7 + 'age_limit': ( + ('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}), }, get_all=False), **traverse_obj(media, { 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}), diff --git a/plugins/youtube_download/yt_dlp/extractor/soundcloud.py b/plugins/youtube_download/yt_dlp/extractor/soundcloud.py index 5c3ff28..bcb8c0f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/soundcloud.py +++ b/plugins/youtube_download/yt_dlp/extractor/soundcloud.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..networking import HEADRequest from ..networking.exceptions import HTTPError +from ..networking.impersonate import ImpersonateTarget from ..utils import ( ExtractorError, float_or_none, @@ -118,9 +119,9 @@ class SoundcloudBaseIE(InfoExtractor): self.cache.store('soundcloud', 'client_id', client_id) def _update_client_id(self): - webpage = self._download_webpage('https://soundcloud.com/', None) + webpage = self._download_webpage('https://soundcloud.com/', None, 'Downloading main page') for src in reversed(re.findall(r']+src="([^"]+)"', webpage)): - script = self._download_webpage(src, None, fatal=False) + script = self._download_webpage(src, None, 'Downloading JS asset', fatal=False) if script: client_id = self._search_regex( r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', @@ -136,13 +137,13 @@ class SoundcloudBaseIE(InfoExtractor): if non_fatal: del kwargs['fatal'] query = kwargs.get('query', {}).copy() - for _ in range(2): + for is_first_attempt in (True, False): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: return self._download_json(*args, **kwargs) except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): + if is_first_attempt and isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): self._store_client_id(None) self._update_client_id() continue @@ -152,7 +153,10 @@ class SoundcloudBaseIE(InfoExtractor): raise def _initialize_pre_login(self): - self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' + self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') + if self._CLIENT_ID: + return + self._update_client_id() def _verify_oauth_token(self, token): if self._request_webpage( @@ -317,9 +321,16 @@ class SoundcloudBaseIE(InfoExtractor): continue # XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called - stream_url = traverse_obj(self._call_api( - format_url, track_id, f'Downloading {short_identifier} format info JSON', - query=query, headers=self._HEADERS), ('url', {url_or_none})) + try: + stream_url = traverse_obj(self._call_api( + format_url, track_id, f'Downloading {short_identifier} format info JSON', + query=query, headers=self._HEADERS), ('url', {url_or_none})) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 404: + self.report_warning(f'{short_identifier} format not found', video_id=track_id) + continue + raise + if invalid_url(stream_url): continue format_urls.add(stream_url) @@ -830,6 +841,30 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE): 'entries': self._entries(base_url, playlist_id), } + @functools.cached_property + def _browser_impersonate_target(self): + available_targets = self._downloader._get_available_impersonate_targets() + if not available_targets: + # impersonate=True gives a generic warning when no impersonation targets are available + return True + + # Any browser target older than chrome-116 is 403'd by Datadome + MIN_SUPPORTED_TARGET = ImpersonateTarget('chrome', '116', 'windows', '10') + version_as_float = lambda x: float(x.version) if x.version else 0 + + # Always try to use the newest Chrome target available + filtered = sorted([ + target[0] for target in available_targets + if target[0].client == 'chrome' and target[0].os in ('windows', 'macos') + ], key=version_as_float) + + if not filtered or version_as_float(filtered[-1]) < version_as_float(MIN_SUPPORTED_TARGET): + # All available targets are inadequate or newest available Chrome target is too old, so + # warn the user to upgrade their dependency to a version with the minimum supported target + return MIN_SUPPORTED_TARGET + + return filtered[-1] + def _entries(self, url, playlist_id): # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. # https://developers.soundcloud.com/blog/offset-pagination-deprecated @@ -844,7 +879,9 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE): try: response = self._call_api( url, playlist_id, query=query, headers=self._HEADERS, - note=f'Downloading track page {i + 1}') + note=f'Downloading track page {i + 1}', + # See: https://github.com/yt-dlp/yt-dlp/issues/15660 + impersonate=self._browser_impersonate_target) break except ExtractorError as e: # Downloading page may result in intermittent 502 HTTP error diff --git a/plugins/youtube_download/yt_dlp/extractor/spankbang.py b/plugins/youtube_download/yt_dlp/extractor/spankbang.py index 05f0bb1..80f9c00 100644 --- a/plugins/youtube_download/yt_dlp/extractor/spankbang.py +++ b/plugins/youtube_download/yt_dlp/extractor/spankbang.py @@ -3,6 +3,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + clean_html, determine_ext, merge_dicts, parse_duration, @@ -12,6 +13,7 @@ from ..utils import ( urlencode_postdata, urljoin, ) +from ..utils.traversal import find_element, traverse_obj, trim_str class SpankBangIE(InfoExtractor): @@ -122,7 +124,7 @@ class SpankBangIE(InfoExtractor): }), headers={ 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', - }) + }, impersonate=True) for format_id, format_url in stream.items(): if format_url and isinstance(format_url, list): @@ -178,9 +180,9 @@ class SpankBangPlaylistIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) playlist_id = mobj.group('id') - - webpage = self._download_webpage( - url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) + country = self.get_param('geo_bypass_country') or 'US' + self._set_cookie('.spankbang.com', 'country', country.upper()) + webpage = self._download_webpage(url, playlist_id, impersonate=True) entries = [self.url_result( urljoin(url, mobj.group('path')), @@ -189,8 +191,8 @@ class SpankBangPlaylistIE(InfoExtractor): r']+\bhref=(["\'])(?P/?[\da-z]+-(?P[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1', webpage)] - title = self._html_search_regex( - r'([^<]+)\s+playlist\s*<', webpage, 'playlist title', - fatal=False) + title = traverse_obj(webpage, ( + {find_element(tag='h1', attr='data-testid', value='playlist-title')}, + {clean_html}, {trim_str(end=' Playlist')})) return self.playlist_result(entries, playlist_id, title) diff --git a/plugins/youtube_download/yt_dlp/extractor/steam.py b/plugins/youtube_download/yt_dlp/extractor/steam.py index 71d9481..fa60fb2 100644 --- a/plugins/youtube_download/yt_dlp/extractor/steam.py +++ b/plugins/youtube_download/yt_dlp/extractor/steam.py @@ -8,15 +8,12 @@ from ..utils import ( extract_attributes, join_nonempty, js_to_json, + parse_resolution, str_or_none, + url_basename, url_or_none, ) -from ..utils.traversal import ( - find_element, - find_elements, - traverse_obj, - trim_str, -) +from ..utils.traversal import find_element, traverse_obj class SteamIE(InfoExtractor): @@ -27,7 +24,7 @@ class SteamIE(InfoExtractor): 'id': '105600', 'title': 'Terraria', }, - 'playlist_mincount': 3, + 'playlist_mincount': 5, }, { 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/', 'info_dict': { @@ -37,6 +34,39 @@ class SteamIE(InfoExtractor): 'playlist_mincount': 26, }] + def _entries(self, app_id, app_name, data_props): + for trailer in traverse_obj(data_props, ( + 'trailers', lambda _, v: str_or_none(v['id']), + )): + movie_id = str_or_none(trailer['id']) + + thumbnails = [] + for thumbnail_url in traverse_obj(trailer, ( + ('poster', 'thumbnail'), {url_or_none}, + )): + thumbnails.append({ + 'url': thumbnail_url, + **parse_resolution(url_basename(thumbnail_url)), + }) + + formats = [] + if hls_manifest := traverse_obj(trailer, ('hlsManifest', {url_or_none})): + formats.extend(self._extract_m3u8_formats( + hls_manifest, app_id, 'mp4', m3u8_id='hls', fatal=False)) + for dash_manifest in traverse_obj(trailer, ('dashManifests', ..., {url_or_none})): + formats.extend(self._extract_mpd_formats( + dash_manifest, app_id, mpd_id='dash', fatal=False)) + self._remove_duplicate_formats(formats) + + yield { + 'id': join_nonempty(app_id, movie_id), + 'title': join_nonempty(app_name, 'video', movie_id, delim=' '), + 'formats': formats, + 'series': app_name, + 'series_id': app_id, + 'thumbnails': thumbnails, + } + def _real_extract(self, url): app_id = self._match_id(url) @@ -45,32 +75,13 @@ class SteamIE(InfoExtractor): self._set_cookie('store.steampowered.com', 'lastagecheckage', '1-January-2000') webpage = self._download_webpage(url, app_id) - app_name = traverse_obj(webpage, ({find_element(cls='apphub_AppName')}, {clean_html})) + data_props = traverse_obj(webpage, ( + {find_element(cls='gamehighlight_desktopcarousel', html=True)}, + {extract_attributes}, 'data-props', {json.loads}, {dict})) + app_name = traverse_obj(data_props, ('appName', {clean_html})) - entries = [] - for data_prop in traverse_obj(webpage, ( - {find_elements(cls='highlight_player_item highlight_movie', html=True)}, - ..., {extract_attributes}, 'data-props', {json.loads}, {dict}, - )): - formats = [] - if hls_manifest := traverse_obj(data_prop, ('hlsManifest', {url_or_none})): - formats.extend(self._extract_m3u8_formats( - hls_manifest, app_id, 'mp4', m3u8_id='hls', fatal=False)) - for dash_manifest in traverse_obj(data_prop, ('dashManifests', ..., {url_or_none})): - formats.extend(self._extract_mpd_formats( - dash_manifest, app_id, mpd_id='dash', fatal=False)) - - movie_id = traverse_obj(data_prop, ('id', {trim_str(start='highlight_movie_')})) - entries.append({ - 'id': movie_id, - 'title': join_nonempty(app_name, 'video', movie_id, delim=' '), - 'formats': formats, - 'series': app_name, - 'series_id': app_id, - 'thumbnail': traverse_obj(data_prop, ('screenshot', {url_or_none})), - }) - - return self.playlist_result(entries, app_id, app_name) + return self.playlist_result( + self._entries(app_id, app_name, data_props), app_id, app_name) class SteamCommunityIE(InfoExtractor): diff --git a/plugins/youtube_download/yt_dlp/extractor/streaks.py b/plugins/youtube_download/yt_dlp/extractor/streaks.py index 60123d6..642e052 100644 --- a/plugins/youtube_download/yt_dlp/extractor/streaks.py +++ b/plugins/youtube_download/yt_dlp/extractor/streaks.py @@ -22,7 +22,7 @@ class StreaksBaseIE(InfoExtractor): _GEO_BYPASS = False _GEO_COUNTRIES = ['JP'] - def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False): + def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False, live_from_start=False): try: response = self._download_json( self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''), @@ -83,6 +83,10 @@ class StreaksBaseIE(InfoExtractor): fmts, subs = self._extract_m3u8_formats_and_subtitles( src_url, media_id, 'mp4', m3u8_id='hls', fatal=False, live=is_live, query=query) + for fmt in fmts: + if live_from_start: + fmt.setdefault('downloader_options', {}).update({'ffmpeg_args': ['-live_start_index', '0']}) + fmt['is_from_start'] = True formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) diff --git a/plugins/youtube_download/yt_dlp/extractor/tarangplus.py b/plugins/youtube_download/yt_dlp/extractor/tarangplus.py index 62f254d..92aa6ac 100644 --- a/plugins/youtube_download/yt_dlp/extractor/tarangplus.py +++ b/plugins/youtube_download/yt_dlp/extractor/tarangplus.py @@ -11,6 +11,7 @@ from ..utils import ( OnDemandPagedList, clean_html, extract_attributes, + url_or_none, urljoin, ) from ..utils.traversal import ( @@ -128,7 +129,7 @@ class TarangPlusVideoIE(TarangPlusBaseIE): **traverse_obj(metadata, { 'id': ('content_id', {str}), 'title': ('title', {str}), - 'thumbnail': ('image', {str}), + 'thumbnail': ('image', {url_or_none}), }), **traverse_obj(hidden_inputs_data, { 'id': ('content_id', {str}), diff --git a/plugins/youtube_download/yt_dlp/extractor/teachable.py b/plugins/youtube_download/yt_dlp/extractor/teachable.py index 0d39be6..5be247a 100644 --- a/plugins/youtube_download/yt_dlp/extractor/teachable.py +++ b/plugins/youtube_download/yt_dlp/extractor/teachable.py @@ -102,7 +102,7 @@ class TeachableIE(TeachableBaseIE): _WORKING = False _VALID_URL = r'''(?x) (?: - {}https?://(?P[^/]+)| + {}https?://(?P[a-zA-Z0-9.-]+)| https?://(?:www\.)?(?P{}) ) /courses/[^/]+/lectures/(?P\d+) @@ -211,7 +211,7 @@ class TeachableIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE): _VALID_URL = r'''(?x) (?: - {}https?://(?P[^/]+)| + {}https?://(?P[a-zA-Z0-9.-]+)| https?://(?:www\.)?(?P{}) ) /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) diff --git a/plugins/youtube_download/yt_dlp/extractor/tele5.py b/plugins/youtube_download/yt_dlp/extractor/tele5.py index a455375..b179060 100644 --- a/plugins/youtube_download/yt_dlp/extractor/tele5.py +++ b/plugins/youtube_download/yt_dlp/extractor/tele5.py @@ -9,39 +9,39 @@ class Tele5IE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P[\w-]+)/(?P[\w-]+)(?:/(?P[\w-]+))?' _TESTS = [{ # slug_a and slug_b - 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', + 'url': 'https://tele5.de/mediathek/star-trek-enterprise/vox-sola', 'info_dict': { - 'id': '6852024', + 'id': '4140114', 'ext': 'mp4', - 'title': 'Quarantäne', - 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', - 'episode': 'Episode 73', - 'episode_number': 73, - 'season': 'Season 4', - 'season_number': 4, - 'series': 'Stargate Atlantis', - 'upload_date': '20240525', - 'timestamp': 1716643200, - 'duration': 2503.2, - 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg', - 'creators': ['Tele5'], + 'title': 'Vox Sola', + 'description': 'md5:329d115f74324d4364efc1a11c4ea7c9', + 'duration': 2542.76, + 'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g', 'tags': [], + 'creators': ['Tele5'], + 'series': 'Star Trek - Enterprise', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 22', + 'episode_number': 22, + 'timestamp': 1770491100, + 'upload_date': '20260207', }, }, { # only slug_a - 'url': 'https://tele5.de/mediathek/inside-out', + 'url': 'https://tele5.de/mediathek/30-miles-from-nowhere-im-wald-hoert-dich-niemand-schreien', 'info_dict': { - 'id': '6819502', + 'id': '4102641', 'ext': 'mp4', - 'title': 'Inside out', - 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', - 'series': 'Inside out', - 'upload_date': '20240523', - 'timestamp': 1716494400, - 'duration': 5343.4, - 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg', - 'creators': ['Tele5'], + 'title': '30 Miles from Nowhere - Im Wald hört dich niemand schreien', + 'description': 'md5:0b731539f39ee186ebcd9dd444a86fc2', + 'duration': 4849.96, + 'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g', 'tags': [], + 'creators': ['Tele5'], + 'series': '30 Miles from Nowhere - Im Wald hört dich niemand schreien', + 'timestamp': 1770417300, + 'upload_date': '20260206', }, }, { # playlist @@ -50,20 +50,27 @@ class Tele5IE(DiscoveryPlusBaseIE): 'id': 'mediathek-schlefaz', }, 'playlist_mincount': 3, + 'skip': 'Dead link', }] def _real_extract(self, url): parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') - query = {'environment': 'tele5', 'v': '2'} + query = { + 'include': 'default', + 'filter[environment]': 'tele5', + 'v': '2', + } + if not slug_b: endpoint = f'page/{slug_a}' query['parent_slug'] = parent_slug else: - endpoint = f'videos/{slug_b}' - query['filter[show.slug]'] = slug_a - cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query) + endpoint = f'shows/{slug_a}' + query['filter[video.slug]'] = slug_b + + cms_data = self._download_json(f'https://public.aurora.enhanced.live/site/{endpoint}/', playlist_id, query=query) return self.playlist_result(map( functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), diff --git a/plugins/youtube_download/yt_dlp/extractor/thechosen.py b/plugins/youtube_download/yt_dlp/extractor/thechosen.py new file mode 100644 index 0000000..79619fe --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/thechosen.py @@ -0,0 +1,118 @@ +from .common import InfoExtractor +from .frontro import FrontroGroupBaseIE +from ..utils import ( + determine_ext, + int_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class TheChosenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/(?:video|watch)/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://watch.thechosen.tv/video/184683594325', + 'md5': '3f878b689588c71b38ec9943c54ff5b0', + 'info_dict': { + 'id': '184683594325', + 'ext': 'mp4', + 'title': 'Season 3 Episode 2: Two by Two', + 'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c', + 'duration': 4212, + 'thumbnail': 'https://cas.global.ssl.fastly.net/hls-10-4/184683594325/thumbnail.png', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://watch.thechosen.tv/video/184683596189', + 'md5': 'd581562f9d29ce82f5b7770415334151', + 'info_dict': { + 'id': '184683596189', + 'ext': 'mp4', + 'title': 'Season 4 Episode 8: Humble', + 'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76', + 'duration': 5092, + 'thumbnail': 'https://cdn.thechosen.media/videos/cmkvu7nn500nhfm0wpgmm6180/thumbnail.jpg', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://watch.thechosen.tv/video/184683621748', + 'info_dict': { + 'id': '184683621748', + 'ext': 'mp4', + 'title': 'Season 5 Episode 2: House of Cards', + 'description': 'md5:55b389cbb4b7a01d8c2d837102905617', + 'duration': 3086, + 'thumbnail': 'https://cdn.thechosen.media/videos/cmkolt4el000afd5zd6x0aeph/thumbnail.jpg', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://watch.thechosen.tv/video/184683621750', + 'info_dict': { + 'id': '184683621750', + 'ext': 'mp4', + 'title': 'Season 5 Episode 3: Woes', + 'description': 'md5:90ca3cc41316a965fd1cd3d5b3458784', + 'duration': 3519, + 'thumbnail': 'https://cdn.thechosen.media/videos/cmkoltsl8000dfd5z3luid3mg/thumbnail.jpg', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + metadata = self._download_json(f'https://api.watch.thechosen.tv/v1/videos/{video_id}', video_id) + + formats, subtitles = [], {} + for fmt_url in traverse_obj(metadata, ('details', 'video', ..., 'url', {url_or_none})): + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(fmt_url, video_id, 'mp4', fatal=False) + elif ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles(fmt_url, video_id, fatal=False) + else: + self.report_warning(f'Skipping unsupported format extension "{ext}"', video_id=video_id) + continue + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + thumbnails = [] + for thumb_id, thumb_url in traverse_obj(metadata, ( + ('thumbs', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), + )): + thumbnails.append({ + 'id': thumb_id, + 'url': thumb_url, + }) + + return { + 'id': video_id, + **traverse_obj(metadata, ({ + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + })), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + } + + +class TheChosenGroupIE(FrontroGroupBaseIE): + _WORKING = False + _CHANNEL_ID = '12884901895' + _VIDEO_EXTRACTOR = TheChosenIE + _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s' + + _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://watch.thechosen.tv/group/309237658592', + 'info_dict': { + 'id': '309237658592', + 'title': 'Season 3', + 'timestamp': 1746203969, + 'upload_date': '20250502', + 'modified_timestamp': int, + 'modified_date': str, + }, + 'playlist_count': 8, + }] diff --git a/plugins/youtube_download/yt_dlp/extractor/tiktok.py b/plugins/youtube_download/yt_dlp/extractor/tiktok.py index 02ec2b2..0660e6e 100644 --- a/plugins/youtube_download/yt_dlp/extractor/tiktok.py +++ b/plugins/youtube_download/yt_dlp/extractor/tiktok.py @@ -1,4 +1,6 @@ +import base64 import functools +import hashlib import itertools import json import random @@ -15,6 +17,7 @@ from ..utils import ( UnsupportedError, UserNotLive, determine_ext, + extract_attributes, filter_dict, format_field, int_or_none, @@ -25,13 +28,13 @@ from ..utils import ( qualities, srt_subtitles_timecode, str_or_none, - traverse_obj, truncate_string, try_call, try_get, url_or_none, urlencode_postdata, ) +from ..utils.traversal import find_element, require, traverse_obj class TikTokBaseIE(InfoExtractor): @@ -217,38 +220,107 @@ class TikTokBaseIE(InfoExtractor): raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id) return self._parse_aweme_video_app(aweme_detail) + def _solve_challenge_and_set_cookies(self, webpage): + challenge_data = traverse_obj(webpage, ( + {find_element(id='cs', html=True)}, {extract_attributes}, 'class', + filter, {lambda x: f'{x}==='}, {base64.b64decode}, {json.loads})) + + if not challenge_data: + if 'Please wait...' in webpage: + raise ExtractorError('Unable to extract challenge data') + raise ExtractorError('Unexpected response from webpage request') + + self.to_screen('Solving JS challenge using native Python implementation') + + expected_digest = traverse_obj(challenge_data, ( + 'v', 'c', {str}, {base64.b64decode}, + {require('challenge expected digest')})) + + base_hash = traverse_obj(challenge_data, ( + 'v', 'a', {str}, {base64.b64decode}, + {hashlib.sha256}, {require('challenge base hash')})) + + for i in range(1_000_001): + number = str(i).encode() + test_hash = base_hash.copy() + test_hash.update(number) + if test_hash.digest() == expected_digest: + challenge_data['d'] = base64.b64encode(number).decode() + break + else: + raise ExtractorError('Unable to solve JS challenge') + + wci_cookie_value = base64.b64encode( + json.dumps(challenge_data, separators=(',', ':')).encode()).decode() + + # At time of writing, the wci cookie name was `_wafchallengeid` + wci_cookie_name = traverse_obj(webpage, ( + {find_element(id='wci', html=True)}, {extract_attributes}, + 'class', {require('challenge cookie name')})) + + # At time of writing, the **optional** rci cookie name was `waforiginalreid` + rci_cookie_name = traverse_obj(webpage, ( + {find_element(id='rci', html=True)}, {extract_attributes}, 'class')) + rci_cookie_value = traverse_obj(webpage, ( + {find_element(id='rs', html=True)}, {extract_attributes}, 'class')) + + # Actual JS sets Max-Age=1 for the cookies, but we'll manually clear them later instead + expire_time = int(time.time()) + (self.get_param('sleep_interval_requests') or 0) + 120 + self._set_cookie('.tiktok.com', wci_cookie_name, wci_cookie_value, expire_time=expire_time) + if rci_cookie_name and rci_cookie_value: + self._set_cookie('.tiktok.com', rci_cookie_name, rci_cookie_value, expire_time=expire_time) + + return wci_cookie_name, rci_cookie_name + def _extract_web_data_and_status(self, url, video_id, fatal=True): video_data, status = {}, -1 - res = self._download_webpage_handle(url, video_id, fatal=fatal, impersonate=True) - if res is False: + def get_webpage(note='Downloading webpage'): + res = self._download_webpage_handle(url, video_id, note, fatal=fatal, impersonate=True) + if res is False: + return False + + webpage, urlh = res + if urllib.parse.urlparse(urlh.url).path == '/login': + message = 'TikTok is requiring login for access to this content' + if fatal: + self.raise_login_required(message) + self.report_warning(f'{message}. {self._login_hint()}', video_id=video_id) + return False + + return webpage + + webpage = get_webpage() + if webpage is False: return video_data, status - webpage, urlh = res - if urllib.parse.urlparse(urlh.url).path == '/login': - message = 'TikTok is requiring login for access to this content' + universal_data = self._get_universal_data(webpage, video_id) + if not universal_data: + try: + cookie_names = self._solve_challenge_and_set_cookies(webpage) + except ExtractorError as e: + if fatal: + raise + self.report_warning(e.orig_msg, video_id=video_id) + return video_data, status + + webpage = get_webpage(note='Downloading webpage with challenge cookie') + # Manually clear challenge cookies that should expire immediately after webpage request + for cookie_name in filter(None, cookie_names): + self.cookiejar.clear(domain='.tiktok.com', path='/', name=cookie_name) + if webpage is False: + return video_data, status + universal_data = self._get_universal_data(webpage, video_id) + + if not universal_data: + message = 'Unable to extract universal data for rehydration' if fatal: - self.raise_login_required(message) - self.report_warning(f'{message}. {self._login_hint()}') + raise ExtractorError(message) + self.report_warning(message, video_id=video_id) return video_data, status - if universal_data := self._get_universal_data(webpage, video_id): - self.write_debug('Found universal data for rehydration') - status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 - video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) - - elif sigi_data := self._get_sigi_state(webpage, video_id): - self.write_debug('Found sigi state data') - status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 - video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) - - elif next_data := self._search_nextjs_data(webpage, video_id, default={}): - self.write_debug('Found next.js data') - status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 - video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) - - elif fatal: - raise ExtractorError('Unable to extract webpage video data') + status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 + video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) if not traverse_obj(video_data, ('video', {dict})) and traverse_obj(video_data, ('isContentClassified', {bool})): message = 'This post may not be comfortable for some audiences. Log in for access' diff --git a/plugins/youtube_download/yt_dlp/extractor/truth.py b/plugins/youtube_download/yt_dlp/extractor/truth.py index 51d28d1..673948e 100644 --- a/plugins/youtube_download/yt_dlp/extractor/truth.py +++ b/plugins/youtube_download/yt_dlp/extractor/truth.py @@ -51,7 +51,8 @@ class TruthIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id) + status = self._download_json( + f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id, impersonate=True) uploader_id = strip_or_none(traverse_obj(status, ('account', 'username'))) return { 'id': video_id, diff --git a/plugins/youtube_download/yt_dlp/extractor/tver.py b/plugins/youtube_download/yt_dlp/extractor/tver.py index ffcc6a7..97f4d4f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/tver.py +++ b/plugins/youtube_download/yt_dlp/extractor/tver.py @@ -4,6 +4,7 @@ from .streaks import StreaksBaseIE from ..utils import ( ExtractorError, GeoRestrictedError, + clean_html, int_or_none, join_nonempty, make_archive_id, @@ -11,7 +12,9 @@ from ..utils import ( str_or_none, strip_or_none, time_seconds, + unified_timestamp, update_url_query, + url_or_none, ) from ..utils.traversal import require, traverse_obj @@ -257,3 +260,113 @@ class TVerIE(StreaksBaseIE): 'id': video_id, '_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None, } + + +class TVerOlympicIE(StreaksBaseIE): + IE_NAME = 'tver:olympic' + + _API_BASE = 'https://olympic-data.tver.jp/api' + _VALID_URL = r'https?://(?:www\.)?tver\.jp/olympic/milanocortina2026/(?Plive|video)/play/(?P\w+)' + _TESTS = [{ + 'url': 'https://tver.jp/olympic/milanocortina2026/video/play/3b1d4462150b42558d9cc8aabb5238d0/', + 'info_dict': { + 'id': '3b1d4462150b42558d9cc8aabb5238d0', + 'ext': 'mp4', + 'title': '【開会式】ぎゅっと凝縮ハイライト', + 'display_id': 'ref:3b1d4462150b42558d9cc8aabb5238d0', + 'duration': 712.045, + 'live_status': 'not_live', + 'modified_date': r're:\d{8}', + 'modified_timestamp': int, + 'tags': 'count:1', + 'thumbnail': r're:https://.+\.(?:jpg|png)', + 'timestamp': 1770420187, + 'upload_date': '20260206', + 'uploader_id': 'tver-olympic', + }, + }, { + 'url': 'https://tver.jp/olympic/milanocortina2026/live/play/glts313itwvj/', + 'info_dict': { + 'id': 'glts313itwvj', + 'ext': 'mp4', + 'title': '開会式ハイライト', + 'channel_id': 'ntv', + 'display_id': 'ref:sp_260207_spc_01_dvr', + 'duration': 7680, + 'live_status': 'was_live', + 'modified_date': r're:\d{8}', + 'modified_timestamp': int, + 'thumbnail': r're:https://.+\.(?:jpg|png)', + 'timestamp': 1770420300, + 'upload_date': '20260206', + 'uploader_id': 'tver-olympic-live', + }, + }] + + def _real_extract(self, url): + video_type, video_id = self._match_valid_url(url).group('type', 'id') + live_from_start = self.get_param('live_from_start') + + if video_type == 'live': + project_id = 'tver-olympic-live' + api_key = 'a35ebb1ca7d443758dc7fcc5d99b1f72' + olympic_data = traverse_obj(self._download_json( + f'{self._API_BASE}/live/{video_id}', video_id), ('contents', 'live', {dict})) + media_id = traverse_obj(olympic_data, ('video_id', {str})) + + now = time_seconds() + start_timestamp_str = traverse_obj(olympic_data, ('onair_start_date', {str})) + start_timestamp = unified_timestamp(start_timestamp_str, tz_offset=9) + if not start_timestamp: + raise ExtractorError('Unable to extract on-air start time') + end_timestamp = traverse_obj(olympic_data, ( + 'onair_end_date', {unified_timestamp(tz_offset=9)}, {require('on-air end time')})) + + if now < start_timestamp: + self.raise_no_formats( + f'This program is scheduled to start at {start_timestamp_str} JST', expected=True) + + return { + 'id': video_id, + 'live_status': 'is_upcoming', + 'release_timestamp': start_timestamp, + } + elif start_timestamp <= now < end_timestamp: + live_status = 'is_live' + if live_from_start: + media_id += '_dvr' + elif end_timestamp <= now: + dvr_end_timestamp = traverse_obj(olympic_data, ( + 'dvr_end_date', {unified_timestamp(tz_offset=9)})) + if dvr_end_timestamp and now < dvr_end_timestamp: + live_status = 'was_live' + media_id += '_dvr' + else: + raise ExtractorError( + 'This program is no longer available', expected=True) + else: + project_id = 'tver-olympic' + api_key = '4b55a4db3cce4ad38df6dd8543e3e46a' + media_id = video_id + live_status = 'not_live' + olympic_data = traverse_obj(self._download_json( + f'{self._API_BASE}/video/{video_id}', video_id), ('contents', 'video', {dict})) + + return { + **self._extract_from_streaks_api(project_id, f'ref:{media_id}', { + 'Origin': 'https://tver.jp', + 'Referer': 'https://tver.jp/', + 'X-Streaks-Api-Key': api_key, + }, live_from_start=live_from_start), + **traverse_obj(olympic_data, { + 'title': ('title', {clean_html}, filter), + 'alt_title': ('sub_title', {clean_html}, filter), + 'channel': ('channel', {clean_html}, filter), + 'channel_id': ('channel_id', {clean_html}, filter), + 'description': (('description', 'description_l', 'description_s'), {clean_html}, filter, any), + 'timestamp': ('onair_start_date', {unified_timestamp(tz_offset=9)}), + 'thumbnail': (('picture_l_url', 'picture_m_url', 'picture_s_url'), {url_or_none}, any), + }), + 'id': video_id, + 'live_status': live_status, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/tvo.py b/plugins/youtube_download/yt_dlp/extractor/tvo.py new file mode 100644 index 0000000..c4bce3b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/tvo.py @@ -0,0 +1,152 @@ +import json +import urllib.parse + +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_duration, + parse_iso8601, + smuggle_url, + str_or_none, + url_or_none, +) +from ..utils.traversal import ( + require, + traverse_obj, + trim_str, +) + + +class TvoIE(InfoExtractor): + IE_NAME = 'TVO' + _VALID_URL = r'https?://(?:www\.)?tvo\.org/video(?:/documentaries)?/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.tvo.org/video/how-can-ontario-survive-the-trade-war', + 'info_dict': { + 'id': '6377531034112', + 'ext': 'mp4', + 'title': 'How Can Ontario Survive the Trade War?', + 'description': 'md5:e7455d9cd4b6b1270141922044161457', + 'display_id': 'how-can-ontario-survive-the-trade-war', + 'duration': 3531, + 'episode': 'How Can Ontario Survive the Trade War?', + 'episode_id': 'how-can-ontario-survive-the-trade-war', + 'episode_number': 1, + 'season': 'Season 1', + 'season_number': 1, + 'series': 'TVO at AMO', + 'series_id': 'tvo-at-amo', + 'tags': 'count:17', + 'thumbnail': r're:https?://.+', + 'timestamp': 1756944016, + 'upload_date': '20250904', + 'uploader_id': '18140038001', + }, + }, { + 'url': 'https://www.tvo.org/video/documentaries/the-pitch', + 'info_dict': { + 'id': '6382500333112', + 'ext': 'mp4', + 'title': 'The Pitch', + 'categories': ['Documentaries'], + 'description': 'md5:9d4246b70dce772a3a396c4bd84c8506', + 'display_id': 'the-pitch', + 'duration': 5923, + 'episode': 'The Pitch', + 'episode_id': 'the-pitch', + 'episode_number': 1, + 'season': 'Season 1', + 'season_number': 1, + 'series': 'The Pitch', + 'series_id': 'the-pitch', + 'tags': 'count:8', + 'thumbnail': r're:https?://.+', + 'timestamp': 1762693216, + 'upload_date': '20251109', + 'uploader_id': '18140038001', + }, + }, { + 'url': 'https://www.tvo.org/video/documentaries/valentines-day', + 'info_dict': { + 'id': '6387298331112', + 'ext': 'mp4', + 'title': 'Valentine\'s Day', + 'categories': ['Documentaries'], + 'description': 'md5:b142149beb2d3a855244816c50cd2f14', + 'display_id': 'valentines-day', + 'duration': 3121, + 'episode': 'Valentine\'s Day', + 'episode_id': 'valentines-day', + 'episode_number': 2, + 'season': 'Season 1', + 'season_number': 1, + 'series': 'How We Celebrate', + 'series_id': 'how-we-celebrate', + 'tags': 'count:6', + 'thumbnail': r're:https?://.+', + 'timestamp': 1770386416, + 'upload_date': '20260206', + 'uploader_id': '18140038001', + }, + }] + BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/18140038001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + video_data = self._download_json( + 'https://hmy0rc1bo2.execute-api.ca-central-1.amazonaws.com/graphql', + display_id, headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'operationName': 'getVideo', + 'variables': {'slug': urllib.parse.urlparse(url).path.rstrip('/')}, + 'query': '''query getVideo($slug: String) { + getTVOOrgVideo(slug: $slug) { + contentCategory + description + length + program { + nodeUrl + title + } + programOrder + publishedAt + season + tags + thumbnail + title + videoSource { + brightcoveRefId + } + } + }''', + }, separators=(',', ':')).encode(), + )['data']['getTVOOrgVideo'] + + brightcove_id = traverse_obj(video_data, ( + 'videoSource', 'brightcoveRefId', {str_or_none}, {require('Brightcove ID')})) + + return { + '_type': 'url_transparent', + 'ie_key': BrightcoveNewIE.ie_key(), + 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['CA']}), + 'display_id': display_id, + 'episode_id': display_id, + **traverse_obj(video_data, { + 'title': ('title', {clean_html}, filter), + 'categories': ('contentCategory', {clean_html}, filter, all, filter), + 'description': ('description', {clean_html}, filter), + 'duration': ('length', {parse_duration}), + 'episode': ('title', {clean_html}, filter), + 'episode_number': ('programOrder', {int_or_none}), + 'season_number': ('season', {int_or_none}), + 'tags': ('tags', ..., {clean_html}, filter), + 'thumbnail': ('thumbnail', {url_or_none}), + 'timestamp': ('publishedAt', {parse_iso8601}), + }), + **traverse_obj(video_data, ('program', { + 'series': ('title', {clean_html}, filter), + 'series_id': ('nodeUrl', {clean_html}, {trim_str(start='/programs/')}, filter), + })), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/twitch.py b/plugins/youtube_download/yt_dlp/extractor/twitch.py index 24e510a..13d3f06 100644 --- a/plugins/youtube_download/yt_dlp/extractor/twitch.py +++ b/plugins/youtube_download/yt_dlp/extractor/twitch.py @@ -43,10 +43,10 @@ class TwitchBaseIE(InfoExtractor): _OPERATION_HASHES = { 'CollectionSideBar': '016e1e4ccee0eb4698eb3bf1a04dc1c077fb746c78c82bac9a8f0289658fbd1a', 'FilterableVideoTower_Videos': '67004f7881e65c297936f32c75246470629557a393788fb5a69d6d9a25a8fd5f', - 'ClipsCards__User': '90c33f5e6465122fba8f9371e2a97076f9ed06c6fed3788d002ab9eba8f91d88', - 'ShareClipRenderStatus': '1844261bb449fa51e6167040311da4a7a5f1c34fe71c71a3e0c4f551bc30c698', + 'ClipsCards__User': '1cd671bfa12cec480499c087319f26d21925e9695d1f80225aae6a4354f23088', + 'ShareClipRenderStatus': '0a02bb974443b576f5579aab0fef1d4b7f44e58a8a256f0c5adfead0db70640f', 'ChannelCollectionsContent': '5247910a19b1cd2b760939bf4cba4dcbd3d13bdf8c266decd16956f6ef814077', - 'StreamMetadata': 'b57f9b910f8cd1a4659d894fe7550ccc81ec9052c01e438b290fd66a040b9b93', + 'StreamMetadata': 'ad022ca32220d5523d03a23cbcb5beaa1e0999889c1f8f78f9f2520dafb5cae6', 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', 'VideoPreviewOverlay': '9515480dee68a77e667cb19de634739d33f243572b007e98e67184b1a5d8369f', 'VideoMetadata': '45111672eea2e507f8ba44d101a61862f9c56b11dee09a15634cb75cb9b9084d', diff --git a/plugins/youtube_download/yt_dlp/extractor/twitter.py b/plugins/youtube_download/yt_dlp/extractor/twitter.py index 063b837..c75c5f1 100644 --- a/plugins/youtube_download/yt_dlp/extractor/twitter.py +++ b/plugins/youtube_download/yt_dlp/extractor/twitter.py @@ -131,11 +131,15 @@ class TwitterBaseIE(InfoExtractor): video_id, headers=headers, query=query, expected_status=allowed_status, note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON') - if result.get('errors'): - errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str})))) - if errors and 'not authorized' in errors: - self.raise_login_required(remove_end(errors, '.')) - raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}') + if error_msg := ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str})))): + # Errors with the message 'Dependency: Unspecified' are a false positive + # See https://github.com/yt-dlp/yt-dlp/issues/15963 + if error_msg.lower() == 'dependency: unspecified': + self.write_debug(f'Ignoring Twitter API error: "{error_msg}"') + elif 'not authorized' in error_msg.lower(): + self.raise_login_required(remove_end(error_msg, '.')) + else: + raise ExtractorError(f'Error(s) while querying API: {error_msg or "Unknown error"}') return result @@ -1078,7 +1082,7 @@ class TwitterIE(TwitterBaseIE): raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True) elif typename == 'TweetUnavailable': reason = result.get('reason') - if reason == 'NsfwLoggedOut': + if reason in ('NsfwLoggedOut', 'NsfwViewerHasNoStatedAge'): self.raise_login_required('NSFW tweet requires authentication') elif reason == 'Protected': self.raise_login_required('You are not authorized to view this protected tweet') diff --git a/plugins/youtube_download/yt_dlp/extractor/unsupported.py b/plugins/youtube_download/yt_dlp/extractor/unsupported.py index 333c7e9..1e6f8c5 100644 --- a/plugins/youtube_download/yt_dlp/extractor/unsupported.py +++ b/plugins/youtube_download/yt_dlp/extractor/unsupported.py @@ -66,6 +66,11 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'rtlmost\.hu', r'plus\.rtl\.de(?!/podcast/)', r'mediasetinfinity\.es', + r'tv5mondeplus\.com', + r'tv\.rakuten\.co\.jp', + r'watch\.telusoriginals\.com', + r'video\.unext\.jp', + r'www\.web\.nhk', ) _TESTS = [{ @@ -226,6 +231,27 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.mediasetinfinity.es/', 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/14743 + 'url': 'https://www.tv5mondeplus.com/', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/8821 + 'url': 'https://tv.rakuten.co.jp/content/519554/', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/9851 + 'url': 'https://watch.telusoriginals.com/play?assetID=fruit-is-ripe', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/13220 + # https://github.com/yt-dlp/yt-dlp/issues/14564 + 'url': 'https://video.unext.jp/play/SID0062010/ED00337407', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/14620 + 'url': 'https://www.web.nhk/tv/an/72hours/pl/series-tep-W3W8WRN8M3/ep/QW8ZY6146V', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/plugins/youtube_download/yt_dlp/extractor/vimeo.py b/plugins/youtube_download/yt_dlp/extractor/vimeo.py index 67cda74..ad5811a 100644 --- a/plugins/youtube_download/yt_dlp/extractor/vimeo.py +++ b/plugins/youtube_download/yt_dlp/extractor/vimeo.py @@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _DEFAULT_CLIENT = 'web' + _DEFAULT_CLIENT = 'macos' _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', @@ -87,6 +87,21 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'upload', 'transcode', 'is_playable', 'has_audio', ), }, + 'macos': { + 'CACHE_KEY': 'oauth-token-macos', + 'CACHE_ONLY': False, + 'VIEWER_JWT': False, + 'REQUIRES_AUTH': False, + 'AUTH': 'NDc1N2JlN2Y5ZjZmMjU3NzE3NTRkZTg1NmY2YzU2MTI0OTFlNjJiYjpwVUNDWUlBZmZqSHhQcndBYWxGMzgyYys2NkN5d1JrREJZZXdPcEdsU05tdjFlVVo2aE1lYk9GcWE3ZW9KVldlYnFlOWh5Vno5UWtpUGJ5empYZFBpYkFwV0FFTnB5VWV4ZEh3aHZnRUNEL0VySnBzTmFraDdNbS9nMXhWanhIcw==', + 'USER_AGENT': 'Vimeo/1.6.3 (com.vimeo.mac; build:251121.142637.0; macOS 13.7.8) Alamofire/5.9.0 VimeoNetworking/5.0.0', + 'VIDEOS_FIELDS': ( + 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', 'width', + 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', 'content_rating', + 'content_rating_class', 'rating_mod_locked', 'license', 'privacy', 'pictures', 'tags', 'stats', + 'categories', 'uploader', 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', + 'resource_key', 'badge', 'upload', 'transcode', 'is_playable', 'has_audio', + ), + }, 'web': { 'CACHE_ONLY': False, 'VIEWER_JWT': True, diff --git a/plugins/youtube_download/yt_dlp/extractor/visir.py b/plugins/youtube_download/yt_dlp/extractor/visir.py new file mode 100644 index 0000000..0733a24 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/visir.py @@ -0,0 +1,116 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + UnsupportedError, + clean_html, + int_or_none, + js_to_json, + month_by_name, + url_or_none, + urljoin, +) +from ..utils.traversal import find_element, traverse_obj + + +class VisirIE(InfoExtractor): + IE_DESC = 'Vísir' + + _VALID_URL = r'https?://(?:www\.)?visir\.is/(?Pk|player)/(?P[\da-f-]+)(?:/(?P[\w.-]+))?' + _EMBED_REGEX = [rf']+src=["\'](?P{_VALID_URL})'] + _TESTS = [{ + 'url': 'https://www.visir.is/k/eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988', + 'info_dict': { + 'id': 'eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988', + 'ext': 'mp4', + 'title': 'Sveppi og Siggi Þór mestu skaphundarnir', + 'categories': ['island-i-dag'], + 'description': 'md5:e06bd6a0cd8bdde328ad8cf00d3d4df6', + 'duration': 792, + 'thumbnail': r're:https?://www\.visir\.is/.+', + 'upload_date': '20260121', + 'view_count': int, + }, + }, { + 'url': 'https://www.visir.is/k/b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704/tonlistarborgin-reykjavik', + 'info_dict': { + 'id': 'b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704', + 'ext': 'mp4', + 'title': 'Tónlistarborgin Reykjavík', + 'categories': ['tonlist'], + 'description': 'md5:47237589dc95dbde55dfbb163396f88a', + 'display_id': 'tonlistarborgin-reykjavik', + 'duration': 81, + 'thumbnail': r're:https?://www\.visir\.is/.+', + 'upload_date': '20251124', + 'view_count': int, + }, + }, { + 'url': 'https://www.visir.is/player/0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580', + 'info_dict': { + 'id': '0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580', + 'ext': 'mp4', + 'title': 'Sportpakkinn 2. febrúar 2026', + 'categories': ['sportpakkinn'], + 'display_id': 'sportpakkinn-2.-februar-2026', + 'duration': 293, + 'thumbnail': r're:https?://www\.visir\.is/.+', + 'upload_date': '20260202', + 'view_count': int, + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.visir.is/g/20262837896d/segir-von-brigdin-med-prinsessuna-rista-djupt', + 'info_dict': { + 'id': '9ad5e58a-f26f-49f7-8b1d-68f0629485b7-1770059257365', + 'ext': 'mp4', + 'title': 'Norðmenn tala ekki um annað en prinsessuna', + 'categories': ['frettir'], + 'description': 'md5:53e2623ae79e1355778c14f5b557a0cd', + 'display_id': 'nordmenn-tala-ekki-um-annad-en-prinsessuna', + 'duration': 138, + 'thumbnail': r're:https?://www\.visir\.is/.+', + 'upload_date': '20260202', + 'view_count': int, + }, + }] + + def _real_extract(self, url): + video_type, video_id, display_id = self._match_valid_url(url).group('type', 'id', 'slug') + webpage = self._download_webpage(url, video_id) + if video_type == 'player': + real_url = self._og_search_url(webpage) + if not self.suitable(real_url) or self._match_valid_url(real_url).group('type') == 'player': + raise UnsupportedError(real_url) + return self.url_result(real_url, self.ie_key()) + + upload_date = None + date_elements = traverse_obj(webpage, ( + {find_element(cls='article-item__date')}, {clean_html}, filter, {str.split})) + if date_elements and len(date_elements) == 3: + day, month, year = date_elements + day = int_or_none(day.rstrip('.')) + month = month_by_name(month, 'is') + if day and month and re.fullmatch(r'[0-9]{4}', year): + upload_date = f'{year}{month:02d}{day:02d}' + + player = self._search_json( + r'App\.Player\.Init\(', webpage, video_id, 'player', transform_source=js_to_json) + m3u8_url = traverse_obj(player, ('File', {urljoin('https://vod.visir.is/')})) + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'), + 'upload_date': upload_date, + **traverse_obj(webpage, ({find_element(cls='article-item press-ads')}, { + 'description': ({find_element(cls='-large')}, {clean_html}, filter), + 'view_count': ({find_element(cls='article-item__viewcount')}, {clean_html}, {int_or_none}), + })), + **traverse_obj(player, { + 'title': ('Title', {clean_html}), + 'categories': ('Categoryname', {clean_html}, filter, all, filter), + 'duration': ('MediaDuration', {int_or_none}), + 'thumbnail': ('Image', {url_or_none}), + }), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/vk.py b/plugins/youtube_download/yt_dlp/extractor/vk.py index 52749f6..5d4f480 100644 --- a/plugins/youtube_download/yt_dlp/extractor/vk.py +++ b/plugins/youtube_download/yt_dlp/extractor/vk.py @@ -1,6 +1,7 @@ import collections import hashlib import re +import urllib.parse from .common import InfoExtractor from .dailymotion import DailymotionIE @@ -8,6 +9,7 @@ from .odnoklassniki import OdnoklassnikiIE from .sibnet import SibnetEmbedIE from .vimeo import VimeoIE from .youtube import YoutubeIE +from ..jsinterp import JSInterpreter from ..utils import ( ExtractorError, UserNotLive, @@ -36,16 +38,38 @@ class VKBaseIE(InfoExtractor): def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs): response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs) - challenge_url, cookie = response[1].url if response else '', None - if challenge_url.startswith('https://vk.com/429.html?'): - cookie = self._get_cookies(challenge_url).get('hash429') - if not cookie: + if response is False: return response - hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest() + webpage, urlh = response + challenge_url = urlh.url + if urllib.parse.urlparse(challenge_url).path != '/challenge.html': + return response + + self.to_screen(join_nonempty( + video_id and f'[{video_id}]', + 'Received a JS challenge response', + delim=' ')) + + challenge_hash = traverse_obj(challenge_url, ( + {parse_qs}, 'hash429', -1, {require('challenge hash')})) + + func_code = self._search_regex( + r'(?s)var\s+salt\s*=\s*\(\s*function\s*\(\)\s*(\{.+?\})\s*\)\(\);\s*var\s+hash', + webpage, 'JS challenge salt function') + + jsi = JSInterpreter(f'function salt() {func_code}') + salt = jsi.extract_function('salt')([]) + self.write_debug(f'Generated salt with native JS interpreter: {salt}') + + key_hash = hashlib.md5(f'{challenge_hash}:{salt}'.encode()).hexdigest() + self.write_debug(f'JS challenge key hash: {key_hash}') + + # Request with the challenge key and the response should set a 'solution429' cookie self._request_webpage( - update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal, - note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge') + update_url_query(challenge_url, {'key': key_hash}), video_id, + 'Submitting JS challenge solution', 'Unable to solve JS challenge', fatal=True) + return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs) def _perform_login(self, username, password): diff --git a/plugins/youtube_download/yt_dlp/extractor/volejtv.py b/plugins/youtube_download/yt_dlp/extractor/volejtv.py index 42ef9b1..a71d332 100644 --- a/plugins/youtube_download/yt_dlp/extractor/volejtv.py +++ b/plugins/youtube_download/yt_dlp/extractor/volejtv.py @@ -1,40 +1,167 @@ +import functools + from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + int_or_none, + join_nonempty, + orderedSet, + str_or_none, + strftime_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import ( + require, + traverse_obj, +) -class VolejTVIE(InfoExtractor): - _VALID_URL = r'https?://volej\.tv/video/(?P\d+)' +class VolejTVBaseIE(InfoExtractor): + TBR_HEIGHT_MAPPING = { + '6000': 1080, + '2400': 720, + '1500': 480, + '800': 360, + } + + def _call_api(self, endpoint, display_id, query=None): + return self._download_json( + f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/{endpoint}', + display_id, query=query) + + +class VolejTVIE(VolejTVBaseIE): + IE_NAME = 'volejtv:match' + _VALID_URL = r'https?://volej\.tv/match/(?P\d+)' _TESTS = [{ - 'url': 'https://volej.tv/video/725742/', + 'url': 'https://volej.tv/match/270579', 'info_dict': { - 'id': '725742', + 'id': '270579', 'ext': 'mp4', - 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', - 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', - 'title': 'VK Královo Pole vs VK Prostějov', + 'title': 'SWE-CZE (2024-06-16)', + 'categories': ['ženy'], + 'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA', + 'season': '2023-2024', + 'timestamp': 1718553600, + 'upload_date': '20240616', }, }, { - 'url': 'https://volej.tv/video/725605/', + 'url': 'https://volej.tv/match/487520', 'info_dict': { - 'id': '725605', + 'id': '487520', 'ext': 'mp4', - 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', - 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', - 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', + 'thumbnail': r're:https://.+\.(png|jpeg)', + 'title': 'FRA-CZE (2024-09-06)', + 'categories': ['mládež'], + 'series': 'Mistrovství Evropy do 20 let', + 'season': '2024-2025', + 'timestamp': 1725627600, + 'upload_date': '20240906', + }, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_data = self._search_json( - r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id) - formats, subtitle = self._extract_m3u8_formats_and_subtitles( - json_data['urls']['hls'], video_id) - return { + json_data = self._call_api(f'match/{video_id}', video_id) + + formats = [] + for video in traverse_obj(json_data, ('videos', 0, 'qualities', lambda _, v: url_or_none(v['cloud_front_path']))): + formats.append(traverse_obj(video, { + 'url': 'cloud_front_path', + 'tbr': ('quality', {int_or_none}), + 'format_id': ('id', {str_or_none}), + 'height': ('quality', {self.TBR_HEIGHT_MAPPING.get}), + })) + + data = { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), - 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + **traverse_obj(json_data, { + 'series': ('competition_name', {str}), + 'season': ('season', {str}), + 'timestamp': ('match_time', {unified_timestamp}), + 'categories': ('category', ('title'), {str}, filter, all, filter), + 'thumbnail': ('poster', {url_or_none}), + }), 'formats': formats, - 'subtitles': subtitle, } + + teams = orderedSet(traverse_obj(json_data, ('teams', ..., 'shortcut', {str}))) + if len(teams) > 2 and 'FIN' in teams: + teams.remove('FIN') + + data['title'] = join_nonempty( + join_nonempty(*teams, delim='-'), + strftime_or_none(data.get('timestamp'), '(%Y-%m-%d)'), + delim=' ') + + return data + + +class VolejTVPlaylistBaseIE(VolejTVBaseIE): + """Subclasses must set _API_FILTER, _PAGE_SIZE""" + + def _get_page(self, playlist_id, page): + return self._call_api( + f'match/{self._API_FILTER}/{playlist_id}', playlist_id, + query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'}) + + def _entries(self, playlist_id, first_page_data, page): + entries = first_page_data if page == 0 else self._get_page(playlist_id, page) + for match_id in traverse_obj(entries, ('data', ..., 'id')): + yield self.url_result(f'https://volej.tv/match/{match_id}', VolejTVIE) + + +class VolejTVClubPlaylistIE(VolejTVPlaylistBaseIE): + IE_NAME = 'volejtv:club' + _VALID_URL = r'https?://volej\.tv/klub/(?P\d+)' + _TESTS = [{ + 'url': 'https://volej.tv/klub/1173', + 'info_dict': { + 'id': '1173', + 'title': 'VK Jihostroj České Budějovice', + }, + 'playlist_mincount': 30, + }] + _API_FILTER = 'by-team-id-paginated' + _PAGE_SIZE = 6 + + def _real_extract(self, url): + playlist_id = self._match_id(url) + title = self._call_api(f'team/show/{playlist_id}', playlist_id)['title'] + first_page_data = self._get_page(playlist_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, playlist_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) + + +class VolejTVCategoryPlaylistIE(VolejTVPlaylistBaseIE): + IE_NAME = 'volejtv:category' + _VALID_URL = r'https?://volej\.tv/kategorie/(?P[^/$?]+)' + _TESTS = [{ + 'url': 'https://volej.tv/kategorie/chance-cesky-pohar', + 'info_dict': { + 'id': 'chance-cesky-pohar', + 'title': 'Chance Český pohár', + }, + 'playlist_mincount': 30, + }] + _API_FILTER = 'by-category-id-paginated' + _PAGE_SIZE = 10 + + def _get_category(self, playlist_id): + categories = self._call_api('category', playlist_id) + for category in traverse_obj(categories, (lambda _, v: v['slug'] and v['id'] and v['title'])): + if category['slug'] == playlist_id: + return category['id'], category['title'] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + category_id, title = self._get_category(playlist_id) + first_page_data = self._get_page(category_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, category_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) diff --git a/plugins/youtube_download/yt_dlp/extractor/wat.py b/plugins/youtube_download/yt_dlp/extractor/wat.py index c1c3af8..9963fe9 100644 --- a/plugins/youtube_download/yt_dlp/extractor/wat.py +++ b/plugins/youtube_download/yt_dlp/extractor/wat.py @@ -76,7 +76,7 @@ class WatIE(InfoExtractor): if error_code == 'GEOBLOCKED': self.raise_geo_restricted(error_desc, video_info.get('geoList')) elif error_code == 'DELIVERY_ERROR': - if traverse_obj(video_data, ('delivery', 'code')) == 500: + if traverse_obj(video_data, ('delivery', 'code')) in (403, 500): self.report_drm(video_id) error_desc = join_nonempty( error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ') diff --git a/plugins/youtube_download/yt_dlp/extractor/whyp.py b/plugins/youtube_download/yt_dlp/extractor/whyp.py index fef89c3..e4476db 100644 --- a/plugins/youtube_download/yt_dlp/extractor/whyp.py +++ b/plugins/youtube_download/yt_dlp/extractor/whyp.py @@ -1,6 +1,8 @@ from .common import InfoExtractor from ..utils import ( float_or_none, + int_or_none, + parse_iso8601, str_or_none, traverse_obj, url_or_none, @@ -11,17 +13,19 @@ class WhypIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P\d+)' _TESTS = [{ 'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7', - 'md5': 'c1187b42ebf8605284e3dc92aeb33d16', + 'md5': '02fd96427acd9547445979bf0496b013', 'info_dict': { - 'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3', 'id': '18337', - 'title': 'Home Page Example Track', - 'description': 'md5:bd758000fb93f3159339c852b5b9133c', - 'ext': 'mp3', - 'duration': 52.82, + 'title': 'Example Track', + 'display_id': 'example-track', + 'description': 'md5:e0b1bcf1d267dc1a0f15efff09c8f297', + 'ext': 'flac', + 'duration': 135.63, + 'timestamp': 1643216583, + 'upload_date': '20220126', 'uploader': 'Brad', 'uploader_id': '1', - 'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg', + 'thumbnail': 'https://cdn.whyp.it/6ad0bbd9-577d-42bb-9b61-2a4f57f647eb.jpg', }, }, { 'url': 'https://www.whyp.it/tracks/18337', @@ -34,17 +38,23 @@ class WhypIE(InfoExtractor): data = self._search_nuxt_data(webpage, unique_id)['rawTrack'] return { - 'url': data['audio_url'], 'id': unique_id, + 'formats': [{ + 'url': data[f'{prefix}_url'], + 'format_id': prefix, + 'filesize': int_or_none(data.get(f'{prefix}_size')), + 'vcodec': 'none', + 'quality': 10 if prefix == 'lossless' else -1, + 'http_headers': {'Referer': 'https://whyp.it/'}, + } for prefix in ('audio', 'lossy', 'lossless') if url_or_none(data.get(f'{prefix}_url'))], **traverse_obj(data, { - 'title': 'title', + 'title': ('title', {str}), + 'display_id': ('slug', {str}), 'description': 'description', 'duration': ('duration', {float_or_none}), - 'uploader': ('user', 'username'), + 'timestamp': ('created_at', {parse_iso8601}), + 'uploader': ('user', 'username', {str}), 'uploader_id': ('user', 'id', {str_or_none}), 'thumbnail': ('artwork_url', {url_or_none}), }), - 'ext': 'mp3', - 'vcodec': 'none', - 'http_headers': {'Referer': 'https://whyp.it/'}, } diff --git a/plugins/youtube_download/yt_dlp/extractor/xhamster.py b/plugins/youtube_download/yt_dlp/extractor/xhamster.py index 8b2893b..5d600c0 100644 --- a/plugins/youtube_download/yt_dlp/extractor/xhamster.py +++ b/plugins/youtube_download/yt_dlp/extractor/xhamster.py @@ -3,6 +3,7 @@ import re import urllib.parse from .common import InfoExtractor +from ..jsinterp import int_to_int32 from ..utils import ( ExtractorError, clean_html, @@ -20,73 +21,69 @@ from ..utils import ( ) -def to_signed_32(n): - return n % ((-1 if n < 0 else 1) * 2**32) - - class _ByteGenerator: def __init__(self, algo_id, seed): try: self._algorithm = getattr(self, f'_algo{algo_id}') except AttributeError: raise ExtractorError(f'Unknown algorithm ID "{algo_id}"') - self._s = to_signed_32(seed) + self._s = int_to_int32(seed) def _algo1(self, s): # LCG (a=1664525, c=1013904223, m=2^32) # Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator - s = self._s = to_signed_32(s * 1664525 + 1013904223) + s = self._s = int_to_int32(s * 1664525 + 1013904223) return s def _algo2(self, s): # xorshift32 # Ref: https://en.wikipedia.org/wiki/Xorshift - s = to_signed_32(s ^ (s << 13)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17)) - s = self._s = to_signed_32(s ^ (s << 5)) + s = int_to_int32(s ^ (s << 13)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 17)) + s = self._s = int_to_int32(s ^ (s << 5)) return s def _algo3(self, s): # Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32) # Ref: https://en.wikipedia.org/wiki/Weyl_sequence # https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html - s = self._s = to_signed_32(s + 0x9e3779b9) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) - s = to_signed_32(s * to_signed_32(0x85ebca77)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13)) - s = to_signed_32(s * to_signed_32(0xc2b2ae3d)) - return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) + s = self._s = int_to_int32(s + 0x9e3779b9) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16)) + s = int_to_int32(s * int_to_int32(0x85ebca77)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 13)) + s = int_to_int32(s * int_to_int32(0xc2b2ae3d)) + return int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16)) def _algo4(self, s): # Custom scrambling function involving a left rotation (ROL) - s = self._s = to_signed_32(s + 0x6d2b79f5) - s = to_signed_32((s << 7) | ((s & 0xFFFFFFFF) >> 25)) # ROL 7 - s = to_signed_32(s + 0x9e3779b9) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 11)) - return to_signed_32(s * 0x27d4eb2d) + s = self._s = int_to_int32(s + 0x6d2b79f5) + s = int_to_int32((s << 7) | ((s & 0xFFFFFFFF) >> 25)) # ROL 7 + s = int_to_int32(s + 0x9e3779b9) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 11)) + return int_to_int32(s * 0x27d4eb2d) def _algo5(self, s): # xorshift variant with a final addition - s = to_signed_32(s ^ (s << 7)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 9)) - s = to_signed_32(s ^ (s << 8)) - s = self._s = to_signed_32(s + 0xa5a5a5a5) + s = int_to_int32(s ^ (s << 7)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 9)) + s = int_to_int32(s ^ (s << 8)) + s = self._s = int_to_int32(s + 0xa5a5a5a5) return s def _algo6(self, s): # LCG (a=0x2c9277b5, c=0xac564b05) with a variable right shift scrambler - s = self._s = to_signed_32(s * to_signed_32(0x2c9277b5) + to_signed_32(0xac564b05)) - s2 = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 18)) + s = self._s = int_to_int32(s * int_to_int32(0x2c9277b5) + int_to_int32(0xac564b05)) + s2 = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 18)) shift = (s & 0xFFFFFFFF) >> 27 & 31 - return to_signed_32((s2 & 0xFFFFFFFF) >> shift) + return int_to_int32((s2 & 0xFFFFFFFF) >> shift) def _algo7(self, s): # Weyl Sequence (k=0x9e3779b9) + custom multiply-xor-shift mixing function - s = self._s = to_signed_32(s + to_signed_32(0x9e3779b9)) - e = to_signed_32(s ^ (s << 5)) - e = to_signed_32(e * to_signed_32(0x7feb352d)) - e = to_signed_32(e ^ ((e & 0xFFFFFFFF) >> 15)) - return to_signed_32(e * to_signed_32(0x846ca68b)) + s = self._s = int_to_int32(s + int_to_int32(0x9e3779b9)) + e = int_to_int32(s ^ (s << 5)) + e = int_to_int32(e * int_to_int32(0x7feb352d)) + e = int_to_int32(e ^ ((e & 0xFFFFFFFF) >> 15)) + return int_to_int32(e * int_to_int32(0x846ca68b)) def __next__(self): return self._algorithm(self._s) & 0xFF @@ -213,16 +210,9 @@ class XHamsterIE(InfoExtractor): 'only_matching': True, }] - def _decipher_format_url(self, format_url, format_id): - parsed_url = urllib.parse.urlparse(format_url) - - hex_string, path_remainder = self._search_regex( - r'^/(?P[0-9a-fA-F]{12,})(?P[/,].+)$', parsed_url.path, 'url components', - default=(None, None), group=('hex', 'rem')) - if not hex_string: - self.report_warning(f'Skipping format "{format_id}": unsupported URL format') - return None + _VALID_HEX_RE = r'[0-9a-fA-F]{12,}' + def _decipher_hex_string(self, hex_string, format_id): byte_data = bytes.fromhex(hex_string) seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True) @@ -232,7 +222,33 @@ class XHamsterIE(InfoExtractor): self.report_warning(f'Skipping format "{format_id}": {e.msg}') return None - deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1') + return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1') + + def _decipher_format_url(self, format_url, format_id): + # format_url can be hex ciphertext or a URL with a hex ciphertext segment + if re.fullmatch(self._VALID_HEX_RE, format_url): + return self._decipher_hex_string(format_url, format_id) + elif not url_or_none(format_url): + if re.fullmatch(r'[0-9a-fA-F]+', format_url): + # Hex strings that are too short are expected, so we don't want to warn + self.write_debug(f'Skipping dummy ciphertext for "{format_id}": {format_url}') + else: + # Something has likely changed on the site's end, so we need to warn + self.report_warning(f'Skipping format "{format_id}": invalid ciphertext') + return None + + parsed_url = urllib.parse.urlparse(format_url) + + hex_string, path_remainder = self._search_regex( + rf'^/(?P{self._VALID_HEX_RE})(?P[/,].+)$', parsed_url.path, 'url components', + default=(None, None), group=('hex', 'rem')) + if not hex_string: + self.report_warning(f'Skipping format "{format_id}": unsupported URL format') + return None + + deciphered = self._decipher_hex_string(hex_string, format_id) + if not deciphered: + return None return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl() @@ -252,7 +268,7 @@ class XHamsterIE(InfoExtractor): display_id = mobj.group('display_id') or mobj.group('display_id_2') desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) - webpage, urlh = self._download_webpage_handle(desktop_url, video_id) + webpage, urlh = self._download_webpage_handle(desktop_url, video_id, impersonate=True) error = self._html_search_regex( r']+id=["\']videoClosed["\'][^>]*>(.+?)', diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/__init__.py b/plugins/youtube_download/yt_dlp/extractor/youtube/__init__.py index 892d860..b0ad186 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/__init__.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/__init__.py @@ -16,7 +16,7 @@ from ._redirect import ( YoutubeYtBeIE, YoutubeYtUserIE, ) -from ._search import YoutubeMusicSearchURLIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE +from ._search import YoutubeMusicSearchURLIE, YoutubeSearchIE, YoutubeSearchURLIE from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE from ._video import YoutubeIE @@ -39,7 +39,6 @@ for _cls in [ YoutubeYtBeIE, YoutubeYtUserIE, YoutubeMusicSearchURLIE, - YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, YoutubePlaylistIE, diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/_base.py b/plugins/youtube_download/yt_dlp/extractor/youtube/_base.py index 2692509..d3e4ab1 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/_base.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/_base.py @@ -99,12 +99,11 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20250925.01.00', + 'clientVersion': '2.20260114.08.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'SUPPORTS_COOKIES': True, - 'SUPPORTS_AD_PLAYBACK_CONTEXT': True, **WEB_PO_TOKEN_POLICIES, }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats @@ -112,20 +111,19 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20250925.01.00', + 'clientVersion': '2.20260114.08.00', 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'SUPPORTS_COOKIES': True, - 'SUPPORTS_AD_PLAYBACK_CONTEXT': True, **WEB_PO_TOKEN_POLICIES, }, 'web_embedded': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_EMBEDDED_PLAYER', - 'clientVersion': '1.20250923.21.00', + 'clientVersion': '1.20260115.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, @@ -136,7 +134,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_REMIX', - 'clientVersion': '1.20250922.03.00', + 'clientVersion': '1.20260114.03.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, @@ -166,7 +164,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_CREATOR', - 'clientVersion': '1.20250922.03.00', + 'clientVersion': '1.20260114.05.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, @@ -195,9 +193,9 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '20.10.38', + 'clientVersion': '21.02.35', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/21.02.35 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, @@ -223,21 +221,8 @@ INNERTUBE_CLIENTS = { }, 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True), }, - # Doesn't require a PoToken for some reason - 'android_sdkless': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'ANDROID', - 'clientVersion': '20.10.38', - 'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip', - 'osName': 'Android', - 'osVersion': '11', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, - 'REQUIRE_JS_PLAYER': False, - }, - # YouTube Kids videos aren't returned on this client for some reason + # "Made for kids" videos aren't available with this client + # Using a clientVersion>1.65 may return SABR streams only 'android_vr': { 'INNERTUBE_CONTEXT': { 'client': { @@ -260,10 +245,10 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', - 'clientVersion': '20.10.4', + 'clientVersion': '21.02.3', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', - 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)', + 'userAgent': 'com.google.ios.youtube/21.02.3 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)', 'osName': 'iPhone', 'osVersion': '18.3.2.22D82', }, @@ -291,7 +276,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', - 'clientVersion': '2.20250925.01.00', + 'clientVersion': '2.20260115.01.00', # mweb previously did not require PO Token with this UA 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', }, @@ -322,24 +307,24 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5', - 'clientVersion': '7.20250923.13.00', - 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', + 'clientVersion': '7.20260114.12.00', + # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 + 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, - # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 - 'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', }, 'tv_downgraded': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5', - 'clientVersion': '5.20251105', + 'clientVersion': '5.20260114', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'REQUIRE_AUTH': True, 'SUPPORTS_COOKIES': True, }, 'tv_simply': { @@ -365,20 +350,6 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 75, }, - # This client now requires sign-in for every video - # It was previously an age-gate workaround for videos that were `playable_in_embed` - # It may still be useful if signed into an EU account that is not age-verified - 'tv_embedded': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', - 'clientVersion': '2.0', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, - 'REQUIRE_AUTH': True, - 'SUPPORTS_COOKIES': True, - }, } @@ -397,7 +368,7 @@ def short_client_name(client_name): def _fix_embedded_ytcfg(ytcfg): ytcfg['INNERTUBE_CONTEXT'].setdefault('thirdParty', {}).update({ - 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL + 'embedUrl': 'https://www.reddit.com/', # Can be any valid non-YouTube URL }) @@ -418,7 +389,6 @@ def build_innertube_clients(): ytcfg.setdefault('SUPPORTS_COOKIES', False) ytcfg.setdefault('SUPPORTS_AD_PLAYBACK_CONTEXT', False) ytcfg.setdefault('PLAYER_PARAMS', None) - ytcfg.setdefault('AUTHENTICATED_USER_AGENT', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') _, base_client, variant = _split_innertube_client(client) @@ -703,14 +673,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' def _get_default_ytcfg(self, client='web'): - ytcfg = copy.deepcopy(INNERTUBE_CLIENTS[client]) - - # Currently, only the tv client needs to use an alternative user-agent when logged-in - if ytcfg.get('AUTHENTICATED_USER_AGENT') and self.is_authenticated: - client_context = ytcfg.setdefault('INNERTUBE_CONTEXT', {}).setdefault('client', {}) - client_context['userAgent'] = ytcfg['AUTHENTICATED_USER_AGENT'] - - return ytcfg + return copy.deepcopy(INNERTUBE_CLIENTS[client]) def _get_innertube_host(self, client='web'): return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST'] @@ -994,16 +957,25 @@ class YoutubeBaseInfoExtractor(InfoExtractor): url = { 'mweb': 'https://m.youtube.com', 'web': 'https://www.youtube.com', + 'web_safari': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', + 'web_creator': 'https://studio.youtube.com', 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', 'tv': 'https://www.youtube.com/tv', }.get(client) if not url: return {} + + default_ytcfg = self._get_default_ytcfg(client) + + if default_ytcfg['REQUIRE_AUTH'] and not self.is_authenticated: + return {} + webpage = self._download_webpage_with_retries( url, video_id, note=f'Downloading {client.replace("_", " ").strip()} client config', - headers=traverse_obj(self._get_default_ytcfg(client), { + headers=traverse_obj(default_ytcfg, { 'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}), + 'Referer': ('INNERTUBE_CONTEXT', 'thirdParty', 'embedUrl', {str}), })) ytcfg = self.extract_ytcfg(video_id, webpage) or {} diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/_search.py b/plugins/youtube_download/yt_dlp/extractor/youtube/_search.py index be10a20..7fe5d9d 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/_search.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/_search.py @@ -28,21 +28,6 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): }] -class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): - IE_NAME = YoutubeSearchIE.IE_NAME + ':date' - _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube search, newest videos first' - _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date - _TESTS = [{ - 'url': 'ytsearchdate5:youtube-dl test video', - 'playlist_count': 5, - 'info_dict': { - 'id': 'youtube-dl test video', - 'title': 'youtube-dl test video', - }, - }] - - class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/_tab.py b/plugins/youtube_download/yt_dlp/extractor/youtube/_tab.py index 450b4aa..78d5e5b 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/_tab.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/_tab.py @@ -81,7 +81,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer')) title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText') - description = self._get_text(renderer, 'descriptionSnippet') + description = self._get_text(renderer, 'descriptionSnippet', ('detailedMetadataSnippets', ..., 'snippetText')) duration = int_or_none(renderer.get('lengthSeconds')) if duration is None: @@ -2148,7 +2148,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:]) elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist mdata = self._extract_tab_endpoint( - f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') + f'https://music.youtube.com/browse/{item_id}', item_id, default_client='web_music') murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=str) if not murl: diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/_video.py b/plugins/youtube_download/yt_dlp/extractor/youtube/_video.py index 5c0f5a6..f12636d 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/_video.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/_video.py @@ -10,7 +10,6 @@ import re import sys import threading import time -import traceback import urllib.parse from ._base import ( @@ -63,6 +62,7 @@ from ...utils import ( unescapeHTML, unified_strdate, unsmuggle_url, + update_url, update_url_query, url_or_none, urljoin, @@ -139,17 +139,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ] _RETURN_TYPE = 'video' # XXX: How to handle multifeed? - _PLAYER_INFO_RE = ( - r'/s/player/(?P[a-zA-Z0-9_-]{8,})/(?:tv-)?player', - r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', - r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', - ) _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt') - _DEFAULT_CLIENTS = ('tv', 'android_sdkless', 'web') - _DEFAULT_JSLESS_CLIENTS = ('android_sdkless', 'web_safari', 'web') - _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari', 'web') + _DEFAULT_CLIENTS = ('android_vr', 'web_safari') + _DEFAULT_JSLESS_CLIENTS = ('android_vr',) + _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari') # Premium does not require POT (except for subtitles) - _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator', 'web') + _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator') + _WEBPAGE_CLIENTS = ('web', 'web_safari') + _DEFAULT_WEBPAGE_CLIENT = 'web_safari' _GEO_BYPASS = False @@ -1443,7 +1440,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, }, 'params': { - 'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc', 'skip_download': True, }, @@ -1690,7 +1686,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'availability': 'public', 'live_status': 'not_live', - 'comment_count': 15, # XXX: minimum + 'comment_count': 15, # XXX: minimum, but investigate if this changes + 'comments': 'count:15', }, 'params': { 'skip_download': True, @@ -1723,7 +1720,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'availability': 'unlisted', 'live_status': 'not_live', - 'comment_count': 9, # XXX: minimum + 'comment_count': 9, # XXX: minimum, but investigate if this changes + 'comments': 'count:9', + }, + 'params': { + 'skip_download': True, + 'getcomments': True, }, }] _WEBPAGE_TESTS = [{ @@ -1881,13 +1883,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tce': 'player_ias_tce.vflset/en_US/base.js', 'es5': 'player_es5.vflset/en_US/base.js', 'es6': 'player_es6.vflset/en_US/base.js', + 'es6_tcc': 'player_es6_tcc.vflset/en_US/base.js', + 'es6_tce': 'player_es6_tce.vflset/en_US/base.js', 'tv': 'tv-player-ias.vflset/tv-player-ias.js', 'tv_es6': 'tv-player-es6.vflset/tv-player-es6.js', 'phone': 'player-plasma-ias-phone-en_US.vflset/base.js', - 'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js', + 'house': 'house_brand_player.vflset/en_US/base.js', # Used by Google Drive } _INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()} + @functools.cached_property + def _player_js_version(self): + return self._configuration_arg('player_js_version', [None])[0] or self._DEFAULT_PLAYER_JS_VERSION + + @functools.cached_property + def _webpage_client(self): + webpage_client = self._configuration_arg('webpage_client', [self._DEFAULT_WEBPAGE_CLIENT])[0] + if webpage_client not in self._WEBPAGE_CLIENTS: + self.report_warning( + f'Invalid webpage_client "{webpage_client}" requested; ' + f'falling back to {self._DEFAULT_WEBPAGE_CLIENT}', only_once=True) + webpage_client = self._DEFAULT_WEBPAGE_CLIENT + return webpage_client + + @functools.cached_property + def _skipped_webpage_data(self): + skipped = set(self._configuration_arg('webpage_skip')) + # If forcing a player version, the webpage player response must be skipped + if self._player_js_version != 'actual': + skipped.add('player_response') + return skipped + @classmethod def suitable(cls, url): from yt_dlp.utils import parse_qs @@ -1913,13 +1939,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): start_time = time.time() formats = [f for f in formats if f.get('is_from_start')] - def refetch_manifest(format_id, delay): + def refetch_manifest(itag, client_name, delay): nonlocal formats, start_time, is_live if time.time() <= start_time + delay: return _, _, _, _, prs, player_url = self._initial_extract( - url, smuggled_data, webpage_url, 'web', video_id) + url, smuggled_data, webpage_url, self._webpage_client, video_id) video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict) microformats = traverse_obj( prs, (..., 'microformat', 'playerMicroformatRenderer'), @@ -1928,20 +1954,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = live_status == 'is_live' start_time = time.time() - def mpd_feed(format_id, delay): + def mpd_feed(itag, client_name, delay): """ @returns (manifest_url, manifest_stream_number, is_live) or None """ for retry in self.RetryManager(fatal=False): with lock: - refetch_manifest(format_id, delay) + refetch_manifest(itag, client_name, delay) - f = next((f for f in formats if f['format_id'] == format_id), None) + f = next((f for f in formats if f.get('_itag') == itag and f.get('_client') == client_name), None) if not f: if not is_live: retry.error = f'{video_id}: Video is no longer live' else: - retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}' + retry.error = f'Cannot find refreshed manifest for format {itag}{bug_reports_message()}' continue # Formats from ended premieres will be missing a manifest_url @@ -1954,7 +1980,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for f in formats: f['is_live'] = is_live - gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'], + gen = functools.partial(self._live_dash_fragments, video_id, f['_itag'], f['_client'], live_start_time, mpd_feed, not is_live and f.copy()) if is_live: f['fragments'] = gen @@ -1963,7 +1989,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['fragments'] = LazyList(gen({})) del f['is_from_start'] - def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx): + def _live_dash_fragments(self, video_id, itag, client_name, live_start_time, mpd_feed, manifestless_orig_fmt, ctx): FETCH_SPAN, MAX_DURATION = 5, 432000 mpd_url, stream_number, is_live = None, None, True @@ -1987,7 +2013,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) expire_fast = immediate or (last_error and isinstance(last_error, HTTPError) and last_error.status == 403) - mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) + mpd_url, stream_number, is_live = (mpd_feed(itag, client_name, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: if expire_fast and not is_live: @@ -2013,7 +2039,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1)) return True, _last_seq - self.write_debug(f'[{video_id}] Generating fragments for format {format_id}') + self.write_debug(f'[{video_id}] Generating fragments for format {itag}') while is_live: fetch_time = time.time() if no_fragment_score > 30: @@ -2075,15 +2101,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): time.sleep(max(0, FETCH_SPAN + fetch_time - time.time())) def _get_player_js_version(self): - player_js_version = self._configuration_arg('player_js_version', [''])[0] or self._DEFAULT_PLAYER_JS_VERSION - if player_js_version == 'actual': + if self._player_js_version == 'actual': return None, None - if not re.fullmatch(r'[0-9]{5,}@[0-9a-f]{8,}', player_js_version): + if not re.fullmatch(r'[0-9]{5,}@[0-9a-f]{8,}', self._player_js_version): self.report_warning( - f'Invalid player JS version "{player_js_version}" specified. ' + f'Invalid player JS version "{self._player_js_version}" specified. ' f'It should be "actual" or in the format of STS@HASH', only_once=True) return None, None - return player_js_version.split('@') + return self._player_js_version.split('@') def _construct_player_url(self, *, player_id=None, player_url=None): assert player_id or player_url, '_construct_player_url must take one of player_id or player_url' @@ -2174,13 +2199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): @classmethod def _extract_player_info(cls, player_url): - for player_re in cls._PLAYER_INFO_RE: - id_m = re.search(player_re, player_url) - if id_m: - break - else: - raise ExtractorError(f'Cannot identify player {player_url!r}') - return id_m.group('id') + if m := re.search(r'/s/player/(?P[a-fA-F0-9]{8,})/', player_url): + return m.group('id') + raise ExtractorError(f'Cannot identify player {player_url!r}') def _load_player(self, video_id, player_url, fatal=True): player_js_key = self._player_js_cache_key(player_url) @@ -2193,64 +2214,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._code_cache[player_js_key] = code return self._code_cache.get(player_js_key) - def _sig_spec_cache_id(self, player_url, spec_id): - return join_nonempty(self._player_js_cache_key(player_url), str(spec_id)) + def _load_player_data_from_cache(self, name, player_url, *cache_keys, use_disk_cache=False): + cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url), *map(str_or_none, cache_keys)) + if cache_id in self._player_cache: + return self._player_cache[cache_id] - def _load_sig_spec_from_cache(self, spec_cache_id): - # This is almost identical to _load_player_data_from_cache - # I hate it - if spec_cache_id in self._player_cache: - return self._player_cache[spec_cache_id] - spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21') - if spec: - self._player_cache[spec_cache_id] = spec - return spec + if not use_disk_cache: + return None - def _store_sig_spec_to_cache(self, spec_cache_id, spec): - if spec_cache_id not in self._player_cache: - self._player_cache[spec_cache_id] = spec - self.cache.store('youtube-sigfuncs', spec_cache_id, spec) - - def _load_player_data_from_cache(self, name, player_url): - cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url)) - - if data := self._player_cache.get(cache_id): - return data - - data = self.cache.load(*cache_id, min_ver='2025.07.21') + data = self.cache.load(cache_id[0], join_nonempty(*cache_id[1:]), min_ver='2025.07.21') if data: self._player_cache[cache_id] = data return data - def _cached(self, func, *cache_id): - def inner(*args, **kwargs): - if cache_id not in self._player_cache: - try: - self._player_cache[cache_id] = func(*args, **kwargs) - except ExtractorError as e: - self._player_cache[cache_id] = e - except Exception as e: - self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e) - - ret = self._player_cache[cache_id] - if isinstance(ret, Exception): - raise ret - return ret - return inner - - def _store_player_data_to_cache(self, name, player_url, data): - cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url)) + def _store_player_data_to_cache(self, data, name, player_url, *cache_keys, use_disk_cache=False): + cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url), *map(str_or_none, cache_keys)) if cache_id not in self._player_cache: - self.cache.store(*cache_id, data) self._player_cache[cache_id] = data + if use_disk_cache: + self.cache.store(cache_id[0], join_nonempty(*cache_id[1:]), data) def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ Extract signatureTimestamp (sts) Required to tell API what sig/player version is in use. """ - CACHE_ENABLED = False # TODO: enable when preprocessed player JS cache is solved/enabled player_sts_override = self._get_player_js_version()[0] if player_sts_override: @@ -2267,15 +2256,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning(error_msg) return None - if CACHE_ENABLED and (sts := self._load_player_data_from_cache('sts', player_url)): + # TODO: Pass `use_disk_cache=True` when preprocessed player JS cache is solved + if sts := self._load_player_data_from_cache('sts', player_url): return sts if code := self._load_player(video_id, player_url, fatal=fatal): sts = int_or_none(self._search_regex( r'(?:signatureTimestamp|sts)\s*:\s*(?P[0-9]{5})', code, 'JS player signature timestamp', group='sts', fatal=fatal)) - if CACHE_ENABLED and sts: - self._store_player_data_to_cache('sts', player_url, sts) + if sts: + # TODO: Pass `use_disk_cache=True` when preprocessed player JS cache is solved + self._store_player_data_to_cache(sts, 'sts', player_url) return sts @@ -2707,12 +2698,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return {'contentCheckOk': True, 'racyCheckOk': True} @classmethod - def _generate_player_context(cls, sts=None, use_ad_playback_context=False): + def _generate_player_context(cls, sts=None, use_ad_playback_context=False, encrypted_context=None): context = { 'html5Preference': 'HTML5_PREF_WANTS', } if sts is not None: context['signatureTimestamp'] = sts + if encrypted_context: + context['encryptedHostFlags'] = encrypted_context playback_context = { 'contentPlaybackContext': context, @@ -2793,7 +2786,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentFlags', {urllib.parse.parse_qs})) if 'true' in traverse_obj(experiments, (..., 'html5_generate_content_po_token', -1)): self.write_debug( - f'{video_id}: Detected experiment to bind GVS PO Token to video id.', only_once=True) + f'{video_id}: Detected experiment to bind GVS PO Token ' + f'to video ID for {client} client', only_once=True) gvs_bind_to_video_id = True # GVS WebPO Token is bound to visitor_data / Visitor ID when logged out. @@ -2956,7 +2950,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._configuration_arg('use_ad_playback_context', ['false'])[0] != 'false' and traverse_obj(INNERTUBE_CLIENTS, (client, 'SUPPORTS_AD_PLAYBACK_CONTEXT', {bool}))) - yt_query.update(self._generate_player_context(sts, use_ad_playback_context)) + # web_embedded player requests may need to include encryptedHostFlags in its contentPlaybackContext. + # This can be detected with the embeds_enable_encrypted_host_flags_enforcement experiemnt flag, + # but there is no harm in including encryptedHostFlags with all web_embedded player requests. + encrypted_context = None + if _split_innertube_client(client)[2] == 'embedded': + encrypted_context = traverse_obj(player_ytcfg, ( + 'WEB_PLAYER_CONTEXT_CONFIGS', 'WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER', 'encryptedHostFlags')) + + yt_query.update( + self._generate_player_context( + sts=sts, + use_ad_playback_context=use_ad_playback_context, + encrypted_context=encrypted_context)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, @@ -3075,7 +3081,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): tried_iframe_fallback = True pr = None - if client == webpage_client and 'player_response' not in self._configuration_arg('webpage_skip'): + if client == webpage_client and 'player_response' not in self._skipped_webpage_data: pr = initial_pr visitor_data = visitor_data or self._extract_visitor_data(webpage_ytcfg, initial_pr, player_ytcfg) @@ -3141,6 +3147,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: prs.append(pr) + if ( + # Is this a "made for kids" video that can't be downloaded with android_vr? + client == 'android_vr' and self._is_unplayable(pr) + and webpage and 'made for kids' in webpage + # ...and is a JS runtime is available? + and any(p.is_available() for p in self._jsc_director.providers.values()) + ): + append_client('web_embedded') + # web_embedded can work around age-gate and age-verification for some embeddable videos if self._is_agegated(pr) and variant != 'web_embedded': append_client(f'web_embedded.{base_client}') @@ -3157,9 +3172,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen( f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) - # tv_embedded can work around the age-verification requirement for embeddable videos # web_creator may work around age-verification for all videos but requires PO token - append_client('tv_embedded', 'web_creator') + append_client('web_creator') status = traverse_obj(pr, ('playabilityStatus', 'status', {str})) if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'): @@ -3233,7 +3247,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres', ]) + skip_player_js = 'js' in self._configuration_arg('player_skip') format_types = self._configuration_arg('formats') + skip_bad_formats = 'incomplete' not in format_types all_formats = 'duplicate' in format_types if self._configuration_arg('include_duplicate_formats'): all_formats = True @@ -3278,6 +3294,98 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return language_code, DEFAULT_LANG_VALUE return language_code, -1 + def get_manifest_n_challenge(manifest_url): + if not url_or_none(manifest_url): + return None + # Same pattern that the player JS uses to read/replace the n challenge value + return self._search_regex( + r'/n/([^/]+)/', urllib.parse.urlparse(manifest_url).path, + 'n challenge', default=None) + + n_challenges = set() + s_challenges = set() + + def solve_js_challenges(): + # Solve all n/sig challenges in bulk and store the results in self._player_cache + challenge_requests = [] + if n_challenges: + challenge_requests.append(JsChallengeRequest( + type=JsChallengeType.N, + video_id=video_id, + input=NChallengeInput(challenges=list(n_challenges), player_url=player_url))) + if s_challenges: + cached_sigfuncs = set() + for spec_id in s_challenges: + if self._load_player_data_from_cache('sigfuncs', player_url, spec_id, use_disk_cache=True): + cached_sigfuncs.add(spec_id) + s_challenges.difference_update(cached_sigfuncs) + + challenge_requests.append(JsChallengeRequest( + type=JsChallengeType.SIG, + video_id=video_id, + input=SigChallengeInput( + challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], + player_url=player_url))) + + if challenge_requests: + for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests): + if challenge_response.type == JsChallengeType.SIG: + for challenge, result in challenge_response.output.results.items(): + spec_id = len(challenge) + self._store_player_data_to_cache( + [ord(c) for c in result], 'sigfuncs', + player_url, spec_id, use_disk_cache=True) + if spec_id in s_challenges: + s_challenges.remove(spec_id) + + elif challenge_response.type == JsChallengeType.N: + for challenge, result in challenge_response.output.results.items(): + self._store_player_data_to_cache(result, 'n', player_url, challenge) + if challenge in n_challenges: + n_challenges.remove(challenge) + + # Raise warning if any challenge requests remain + # Depending on type of challenge request + help_message = ( + 'Ensure you have a supported JavaScript runtime and ' + 'challenge solver script distribution installed. ' + 'Review any warnings presented before this message. ' + f'For more details, refer to {_EJS_WIKI_URL}') + if s_challenges: + self.report_warning( + f'Signature solving failed: Some formats may be missing. {help_message}', + video_id=video_id, only_once=True) + if n_challenges: + self.report_warning( + f'n challenge solving failed: Some formats may be missing. {help_message}', + video_id=video_id, only_once=True) + + # Clear challenge sets so that any subsequent call of this function is a no-op + s_challenges.clear() + n_challenges.clear() + + # 1st pass to collect all n/sig challenges so they can later be solved at once in bulk + for streaming_data in traverse_obj(player_responses, (..., 'streamingData', {dict})): + # HTTPS formats + for fmt_stream in traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ..., {dict})): + fmt_url = fmt_stream.get('url') + s_challenge = None + if not fmt_url: + sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher')) + fmt_url = traverse_obj(sc, ('url', 0, {url_or_none})) + s_challenge = traverse_obj(sc, ('s', 0)) + + if s_challenge: + s_challenges.add(len(s_challenge)) + + if n_challenge := traverse_obj(fmt_url, ({parse_qs}, 'n', 0)): + n_challenges.add(n_challenge) + + # Manifest formats + n_challenges.update(traverse_obj( + streaming_data, (('hlsManifestUrl', 'dashManifestUrl'), {get_manifest_n_challenge}))) + + # Final pass to extract formats and solve n/sig challenges as needed for pr in player_responses: streaming_data = traverse_obj(pr, 'streamingData') if not streaming_data: @@ -3385,10 +3493,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def process_https_formats(): proto = 'https' https_fmts = [] - skip_player_js = 'js' in self._configuration_arg('player_skip') for fmt_stream in streaming_formats: - if fmt_stream.get('targetDurationSec'): + # Live adaptive https formats are not supported: skip unless extractor-arg given + if fmt_stream.get('targetDurationSec') and skip_bad_formats: continue # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment @@ -3422,19 +3530,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/14883 get_language_code_and_preference(fmt_stream) sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher')) - fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) - encrypted_sig = try_get(sc, lambda x: x['s'][0]) + fmt_url = traverse_obj(sc, ('url', 0, {url_or_none})) + encrypted_sig = traverse_obj(sc, ('s', 0)) if not all((sc, fmt_url, skip_player_js or player_url, encrypted_sig)): - msg = f'Some {client_name} client https formats have been skipped as they are missing a URL. ' + msg_tmpl = ( + '{}Some {} client https formats have been skipped as they are missing a URL. ' + '{}. See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details') if client_name in ('web', 'web_safari'): - msg += 'YouTube is forcing SABR streaming for this client. ' + self.write_debug(msg_tmpl.format( + f'{video_id}: ', client_name, + 'YouTube is forcing SABR streaming for this client'), only_once=True) else: - msg += ( + msg = ( f'YouTube may have enabled the SABR-only streaming experiment for ' - f'{"your account" if self.is_authenticated else "the current session"}. ' - ) - msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details' - self.report_warning(msg, video_id, only_once=True) + f'{"your account" if self.is_authenticated else "the current session"}') + self.report_warning(msg_tmpl.format('', client_name, msg), video_id, only_once=True) continue fmt = process_format_stream( @@ -3444,19 +3554,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue # signature - # Attempt to load sig spec from cache if encrypted_sig: if skip_player_js: continue - spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig)) - spec = self._load_sig_spec_from_cache(spec_cache_id) - if spec: - self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True) - fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature', - solve_sig(encrypted_sig, spec)) - else: - fmt['_jsc_s_challenge'] = encrypted_sig - fmt['_jsc_s_sc'] = sc + solve_js_challenges() + spec = self._load_player_data_from_cache( + 'sigfuncs', player_url, len(encrypted_sig), use_disk_cache=True) + if not spec: + continue + fmt_url += '&{}={}'.format( + traverse_obj(sc, ('sp', -1)) or 'signature', + solve_sig(encrypted_sig, spec)) # n challenge query = parse_qs(fmt_url) @@ -3464,10 +3572,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if skip_player_js: continue n_challenge = query['n'][0] - if n_challenge in self._player_cache: - fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]}) - else: - fmt['_jsc_n_challenge'] = n_challenge + solve_js_challenges() + n_result = self._load_player_data_from_cache('n', player_url, n_challenge) + if not n_result: + continue + fmt_url = update_url_query(fmt_url, {'n': n_result}) if po_token: fmt_url = update_url_query(fmt_url, {'pot': po_token}) @@ -3484,80 +3593,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): https_fmts.append(fmt) - # Bulk process sig/n handling - # Retrieve all JSC Sig and n requests for this player response in one go - n_challenges = {} - s_challenges = {} - for fmt in https_fmts: - # This will de-duplicate requests - n_challenge = fmt.pop('_jsc_n_challenge', None) - if n_challenge is not None: - n_challenges.setdefault(n_challenge, []).append(fmt) - - s_challenge = fmt.pop('_jsc_s_challenge', None) - if s_challenge is not None: - s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt) - - challenge_requests = [] - if n_challenges: - challenge_requests.append(JsChallengeRequest( - type=JsChallengeType.N, - video_id=video_id, - input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url))) - if s_challenges: - challenge_requests.append(JsChallengeRequest( - type=JsChallengeType.SIG, - video_id=video_id, - input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url))) - - if challenge_requests: - for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests): - if challenge_response.type == JsChallengeType.SIG: - for challenge, result in challenge_response.output.results.items(): - spec_id = len(challenge) - spec = [ord(c) for c in result] - self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec) - s_challenge_data = s_challenges.pop(spec_id, {}) - if not s_challenge_data: - continue - for s_challenge, fmts in s_challenge_data.items(): - solved_challenge = solve_sig(s_challenge, spec) - for fmt in fmts: - sc = fmt.pop('_jsc_s_sc') - fmt['url'] += '&{}={}'.format( - traverse_obj(sc, ('sp', -1)) or 'signature', - solved_challenge) - - elif challenge_response.type == JsChallengeType.N: - for challenge, result in challenge_response.output.results.items(): - fmts = n_challenges.pop(challenge, []) - for fmt in fmts: - self._player_cache[challenge] = result - fmt['url'] = update_url_query(fmt['url'], {'n': result}) - - # Raise warning if any challenge requests remain - # Depending on type of challenge request - - help_message = ( - 'Ensure you have a supported JavaScript runtime and ' - 'challenge solver script distribution installed. ' - 'Review any warnings presented before this message. ' - f'For more details, refer to {_EJS_WIKI_URL}') - - if s_challenges: - self.report_warning( - f'Signature solving failed: Some formats may be missing. {help_message}', - video_id=video_id, only_once=True) - if n_challenges: - self.report_warning( - f'n challenge solving failed: Some formats may be missing. {help_message}', - video_id=video_id, only_once=True) - - for cfmts in list(s_challenges.values()) + list(n_challenges.values()): - for fmt in cfmts: - if fmt in https_fmts: - https_fmts.remove(fmt) - for fmt in https_fmts: if (all_formats or 'dashy' in format_types) and fmt['filesize']: yield { @@ -3573,7 +3608,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): yield from process_https_formats() needs_live_processing = self._needs_live_processing(live_status, duration) - skip_bad_formats = 'incomplete' not in format_types skip_manifests = set(self._configuration_arg('skip')) if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway @@ -3640,17 +3674,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor): hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl') if hls_manifest_url: + manifest_path = urllib.parse.urlparse(hls_manifest_url).path + if m := re.fullmatch(r'(?P.+)(?P/(?:file|playlist)/index\.m3u8)', manifest_path): + manifest_path, manifest_suffix = m.group('path', 'suffix') + else: + manifest_suffix = '' + + solved_n = False + n_challenge = get_manifest_n_challenge(hls_manifest_url) + if n_challenge and not skip_player_js: + solve_js_challenges() + n_result = self._load_player_data_from_cache('n', player_url, n_challenge) + if n_result: + manifest_path = manifest_path.replace(f'/n/{n_challenge}', f'/n/{n_result}') + solved_n = n_result in manifest_path + pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg( client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS] require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided) po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended)) if po_token: - hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}' + manifest_path = manifest_path.rstrip('/') + f'/pot/{po_token}' if client_name not in gvs_pots: gvs_pots[client_name] = po_token + if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'): self._report_pot_format_skipped(video_id, client_name, 'hls') - else: + elif solved_n or not n_challenge: + hls_manifest_url = update_url(hls_manifest_url, path=f'{manifest_path}{manifest_suffix}') fmts, subs = self._extract_m3u8_formats_and_subtitles( hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live') for sub in traverse_obj(subs, (..., ..., {dict})): @@ -3665,17 +3716,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl') if dash_manifest_url: + manifest_path = urllib.parse.urlparse(dash_manifest_url).path + + solved_n = False + n_challenge = get_manifest_n_challenge(dash_manifest_url) + if n_challenge and not skip_player_js: + solve_js_challenges() + n_result = self._load_player_data_from_cache('n', player_url, n_challenge) + if n_result: + manifest_path = manifest_path.replace(f'/n/{n_challenge}', f'/n/{n_result}') + solved_n = n_result in manifest_path + pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg( client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH] require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided) po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended)) if po_token: - dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}' + manifest_path = manifest_path.rstrip('/') + f'/pot/{po_token}' if client_name not in gvs_pots: gvs_pots[client_name] = po_token + if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'): self._report_pot_format_skipped(video_id, client_name, 'dash') - else: + elif solved_n or not n_challenge: + dash_manifest_url = update_url(dash_manifest_url, path=manifest_path) formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False) for sub in traverse_obj(subs, (..., ..., {dict})): # TODO: If DASH video requires a PO Token, do the subs also require pot? @@ -3683,11 +3747,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub[STREAMING_DATA_CLIENT_NAME] = client_name subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH for f in formats: - if process_manifest_format(f, 'dash', client_name, f['format_id'], require_po_token and not po_token): + # Save original itag value as format_id because process_manifest_format mutates f + format_id = f['format_id'] + if process_manifest_format(f, 'dash', client_name, format_id, require_po_token and not po_token): f['filesize'] = int_or_none(self._search_regex( r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) if needs_live_processing: f['is_from_start'] = True + f['_itag'] = format_id + f['_client'] = client_name yield f yield subtitles @@ -3800,7 +3868,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg): initial_data = None - if webpage and 'initial_data' not in self._configuration_arg('webpage_skip'): + if webpage and 'initial_data' not in self._skipped_webpage_data: initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False) if not traverse_obj(initial_data, 'contents'): self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.') @@ -3848,10 +3916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): base_url = self.http_scheme() + '//www.youtube.com/' webpage_url = base_url + 'watch?v=' + video_id - webpage_client = 'web' webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract( - url, smuggled_data, webpage_url, webpage_client, video_id) + url, smuggled_data, webpage_url, self._webpage_client, video_id) playability_statuses = traverse_obj( player_responses, (..., 'playabilityStatus'), expected_type=dict) @@ -4053,16 +4120,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): needs_live_processing = self._needs_live_processing(live_status, duration) - def is_bad_format(fmt): - if needs_live_processing and not fmt.get('is_from_start'): - return True - elif (live_status == 'is_live' and needs_live_processing != 'is_live' - and fmt.get('protocol') == 'http_dash_segments'): - return True + def adjust_incomplete_format(fmt, note_suffix='(Last 2 hours)', pref_adjustment=-10): + fmt['preference'] = (fmt.get('preference') or -1) + pref_adjustment + fmt['format_note'] = join_nonempty(fmt.get('format_note'), note_suffix, delim=' ') - for fmt in filter(is_bad_format, formats): - fmt['preference'] = (fmt.get('preference') or -1) - 10 - fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ') + # Adjust preference and format note for incomplete live/post-live formats + if live_status in ('is_live', 'post_live'): + for fmt in formats: + protocol = fmt.get('protocol') + # Currently, protocol isn't set for adaptive https formats, but this could change + is_adaptive = protocol in (None, 'http', 'https') + if live_status == 'post_live' and is_adaptive: + # Post-live adaptive formats cause HttpFD to raise "Did not get any data blocks" + # These formats are *only* useful to external applications, so we can hide them + # Set their preference <= -1000 so that FormatSorter flags them as 'hidden' + adjust_incomplete_format(fmt, note_suffix='(ended)', pref_adjustment=-5000) + # Is it live with --live-from-start? Or is it post-live and its duration is >2hrs? + elif needs_live_processing: + if not fmt.get('is_from_start'): + # Post-live m3u8 formats for >2hr streams + adjust_incomplete_format(fmt) + elif live_status == 'is_live': + if protocol == 'http_dash_segments': + # Live DASH formats without --live-from-start + adjust_incomplete_format(fmt) + elif is_adaptive: + # Incomplete live adaptive https formats + adjust_incomplete_format(fmt, note_suffix='(incomplete)', pref_adjustment=-20) if needs_live_processing: self._prepare_live_from_start_formats( diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/bun.py b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/bun.py index 8b0a6e5..3f6dff0 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/bun.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/bun.py @@ -45,6 +45,8 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider): JS_RUNTIME_NAME = 'bun' BUN_NPM_LIB_FILENAME = 'yt.solver.bun.lib.js' SUPPORTED_PROXY_SCHEMES = ['http', 'https'] + _BUN_MAX_SUPPORTED_VERSION = (1, 3, 14) + _BUN_DEPRECATION_URL = 'https://github.com/yt-dlp/yt-dlp/issues/16766' def _iter_script_sources(self): yield from super()._iter_script_sources() @@ -112,6 +114,19 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider): return options def _run_js_runtime(self, stdin: str, /) -> str: + is_unsupported_version = self.runtime_info.version_tuple > self._BUN_MAX_SUPPORTED_VERSION + if is_unsupported_version: + self.logger.warning( + f'bun version {".".join(map(str, self.runtime_info.version_tuple))} is not supported! ' + f'{".".join(map(str, self._BUN_MAX_SUPPORTED_VERSION))} is the last supported bun version. ' + f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} ' + f'open a bug report even if you encounter any errors!', + once=True) + else: + self.logger.info( + f'bun support has been deprecated. See {self._BUN_DEPRECATION_URL} for details', + once=True) + # https://bun.com/docs/cli/run options = ['--no-addons', '--prefer-offline'] if self._lib_script.variant == ScriptVariant.BUN_NPM: @@ -136,7 +151,7 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider): msg = f'Error running bun process (returncode: {proc.returncode})' if stderr: msg = f'{msg}: {stderr.strip()}' - raise JsChallengeProviderError(msg) + raise JsChallengeProviderError(msg, expected=is_unsupported_version) return stdout def _clean_stderr(self, stderr): diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/quickjs.py b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/quickjs.py index f87725b..73786cd 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/quickjs.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/quickjs.py @@ -21,12 +21,20 @@ from yt_dlp.utils import Popen class QuickJSJCP(EJSBaseJCP, BuiltinIEContentProvider): PROVIDER_NAME = 'quickjs' JS_RUNTIME_NAME = 'quickjs' + _QJS_MIN_RECOMMENDED = { + 'quickjs': (2025, 4, 26), + 'quickjs-ng': (0, 12, 0), + } + _QJS_WARNING_TMPL = ( + '{name} versions older than {version} are missing important optimizations ' + 'and will solve the JS challenges very slowly. Consider upgrading.') def _run_js_runtime(self, stdin: str, /) -> str: - if self.runtime_info.name == 'quickjs-ng': - self.logger.warning('QuickJS-NG is missing some optimizations making this very slow. Consider using upstream QuickJS instead.') - elif self.runtime_info.version_tuple < (2025, 4, 26): - self.logger.warning('Older QuickJS versions are missing optimizations making this very slow. Consider upgrading.') + min_recommended_version = self._QJS_MIN_RECOMMENDED[self.runtime_info.name] + if self.runtime_info.version_tuple < min_recommended_version: + self.logger.warning(self._QJS_WARNING_TMPL.format( + name=self.runtime_info.name, + version='.'.join(map(str, min_recommended_version)))) # QuickJS does not support reading from stdin, so we have to use a temp file temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8') diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/_info.py b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/_info.py index 5e42ce3..0667549 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/_info.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/_info.py @@ -1,10 +1,10 @@ -# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY! +# This file is generated by devscripts/update_requirements.py. DO NOT MODIFY! -VERSION = '0.3.2' +VERSION = '0.8.0' HASHES = { 'yt.solver.bun.lib.js': '6ff45e94de9f0ea936a183c48173cfa9ce526ee4b7544cd556428427c1dd53c8073ef0174e79b320252bf0e7c64b0032cc1cf9c4358f3fda59033b7caa01c241', - 'yt.solver.core.js': '0cd96b2d3f319dfa62cae689efa7d930ef1706e95f5921794db5089b2262957ec0a17d73938d8975ea35d0309cbfb4c8e4418d5e219837215eee242890c8b64d', - 'yt.solver.core.min.js': '370d627703002b4a73b10027702734a3de9484f6b56b739942be1dc2b60fee49dee2aa86ed117d1c8ae1ac55181d326481f1fe2e2e8d5211154d48e2a55dac51', + 'yt.solver.core.js': 'c163a6f376db6ce3da47d516a28a8f2a0554ae95c58dc766f0a6e2b3894f2cef1ee07fa84beb442fa471aac4f300985added1657c7c94c4d1cfefe68920ab599', + 'yt.solver.core.min.js': 'ee5b307d07f55e91e4723edf5ac205cc877a474187849d757dc1322e38427b157a9d706d510c1723d3670f98e5a3f8cbcde77874a80406bd7204bc9fea30f283', 'yt.solver.deno.lib.js': '9c8ee3ab6c23e443a5a951e3ac73c6b8c1c8fb34335e7058a07bf99d349be5573611de00536dcd03ecd3cf34014c4e9b536081de37af3637c5390c6a6fd6a0f0', 'yt.solver.lib.js': '1ee3753a8222fc855f5c39db30a9ccbb7967dbe1fb810e86dc9a89aa073a0907f294c720e9b65427d560a35aa1ce6af19ef854d9126a05ca00afe03f72047733', 'yt.solver.lib.min.js': '8420c259ad16e99ce004e4651ac1bcabb53b4457bf5668a97a9359be9a998a789fee8ab124ee17f91a2ea8fd84e0f2b2fc8eabcaf0b16a186ba734cf422ad053', diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/yt.solver.core.js b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/yt.solver.core.js index 1cfa0d6..bbf5555 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/yt.solver.core.js +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/jsc/_builtin/vendor/yt.solver.core.js @@ -39,189 +39,8 @@ var jsc = (function (meriyah, astring) { function isOneOf(value, ...of) { return of.includes(value); } - function _optionalChain$2(ops) { - let lastAccessLHS = undefined; - let value = ops[0]; - let i = 1; - while (i < ops.length) { - const op = ops[i]; - const fn = ops[i + 1]; - i += 2; - if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { - return undefined; - } - if (op === 'access' || op === 'optionalAccess') { - lastAccessLHS = value; - value = fn(value); - } else if (op === 'call' || op === 'optionalCall') { - value = fn((...args) => value.call(lastAccessLHS, ...args)); - lastAccessLHS = undefined; - } - } - return value; - } - const logicalExpression = { - type: 'ExpressionStatement', - expression: { - type: 'LogicalExpression', - left: { type: 'Identifier' }, - right: { - type: 'SequenceExpression', - expressions: [ - { - type: 'AssignmentExpression', - left: { type: 'Identifier' }, - operator: '=', - right: { - type: 'CallExpression', - callee: { type: 'Identifier' }, - arguments: { - or: [ - [ - { type: 'Literal' }, - { - type: 'CallExpression', - callee: { - type: 'Identifier', - name: 'decodeURIComponent', - }, - arguments: [{ type: 'Identifier' }], - optional: false, - }, - ], - [ - { - type: 'CallExpression', - callee: { - type: 'Identifier', - name: 'decodeURIComponent', - }, - arguments: [{ type: 'Identifier' }], - optional: false, - }, - ], - ], - }, - optional: false, - }, - }, - { type: 'CallExpression' }, - ], - }, - operator: '&&', - }, - }; - const identifier$1 = { - or: [ - { - type: 'ExpressionStatement', - expression: { - type: 'AssignmentExpression', - operator: '=', - left: { type: 'Identifier' }, - right: { type: 'FunctionExpression', params: [{}, {}, {}] }, - }, - }, - { type: 'FunctionDeclaration', params: [{}, {}, {}] }, - { - type: 'VariableDeclaration', - declarations: { - anykey: [ - { - type: 'VariableDeclarator', - init: { type: 'FunctionExpression', params: [{}, {}, {}] }, - }, - ], - }, - }, - ], - }; - function extract$1(node) { - if (!matchesStructure(node, identifier$1)) { - return null; - } - let block; - if ( - node.type === 'ExpressionStatement' && - node.expression.type === 'AssignmentExpression' && - node.expression.right.type === 'FunctionExpression' - ) { - block = node.expression.right.body; - } else if (node.type === 'VariableDeclaration') { - for (const decl of node.declarations) { - if ( - decl.type === 'VariableDeclarator' && - _optionalChain$2([ - decl, - 'access', - (_) => _.init, - 'optionalAccess', - (_2) => _2.type, - ]) === 'FunctionExpression' && - _optionalChain$2([ - decl, - 'access', - (_3) => _3.init, - 'optionalAccess', - (_4) => _4.params, - 'access', - (_5) => _5.length, - ]) === 3 - ) { - block = decl.init.body; - break; - } - } - } else if (node.type === 'FunctionDeclaration') { - block = node.body; - } else { - return null; - } - const relevantExpression = _optionalChain$2([ - block, - 'optionalAccess', - (_6) => _6.body, - 'access', - (_7) => _7.at, - 'call', - (_8) => _8(-2), - ]); - if (!matchesStructure(relevantExpression, logicalExpression)) { - return null; - } - if ( - _optionalChain$2([ - relevantExpression, - 'optionalAccess', - (_9) => _9.type, - ]) !== 'ExpressionStatement' || - relevantExpression.expression.type !== 'LogicalExpression' || - relevantExpression.expression.right.type !== 'SequenceExpression' || - relevantExpression.expression.right.expressions[0].type !== - 'AssignmentExpression' - ) { - return null; - } - const call = relevantExpression.expression.right.expressions[0].right; - if (call.type !== 'CallExpression' || call.callee.type !== 'Identifier') { - return null; - } - return { - type: 'ArrowFunctionExpression', - params: [{ type: 'Identifier', name: 'sig' }], - body: { - type: 'CallExpression', - callee: { type: 'Identifier', name: call.callee.name }, - arguments: - call.arguments.length === 1 - ? [{ type: 'Identifier', name: 'sig' }] - : [call.arguments[0], { type: 'Identifier', name: 'sig' }], - optional: false, - }, - async: false, - expression: false, - generator: false, - }; + function generateArrowFunction(data) { + return meriyah.parse(data).body[0].expression; } function _optionalChain$1(ops) { let lastAccessLHS = undefined; @@ -246,156 +65,117 @@ var jsc = (function (meriyah, astring) { } const identifier = { or: [ + { + type: 'ExpressionStatement', + expression: { + type: 'AssignmentExpression', + operator: '=', + left: { or: [{ type: 'Identifier' }, { type: 'MemberExpression' }] }, + right: { type: 'FunctionExpression', async: false }, + }, + }, + { type: 'FunctionDeclaration', async: false, id: { type: 'Identifier' } }, { type: 'VariableDeclaration', - kind: 'var', declarations: { anykey: [ { type: 'VariableDeclarator', - id: { type: 'Identifier' }, - init: { - type: 'ArrayExpression', - elements: [{ type: 'Identifier' }], - }, + init: { type: 'FunctionExpression', async: false }, }, ], }, }, - { - type: 'ExpressionStatement', - expression: { - type: 'AssignmentExpression', - left: { type: 'Identifier' }, - operator: '=', - right: { - type: 'ArrayExpression', - elements: [{ type: 'Identifier' }], - }, - }, - }, ], }; - const catchBlockBody = [ - { - type: 'ReturnStatement', - argument: { - type: 'BinaryExpression', - left: { - type: 'MemberExpression', - object: { type: 'Identifier' }, - computed: true, - property: { type: 'Literal' }, - optional: false, - }, - right: { type: 'Identifier' }, - operator: '+', + const asdasd = { + type: 'ExpressionStatement', + expression: { + type: 'CallExpression', + callee: { + type: 'MemberExpression', + object: { type: 'Identifier' }, + property: {}, + optional: false, }, + arguments: [ + { type: 'Literal', value: 'alr' }, + { type: 'Literal', value: 'yes' }, + ], + optional: false, }, - ]; + }; function extract(node) { if (!matchesStructure(node, identifier)) { - let name = null; - let block = null; - switch (node.type) { - case 'ExpressionStatement': { - if ( - node.expression.type === 'AssignmentExpression' && - node.expression.left.type === 'Identifier' && - node.expression.right.type === 'FunctionExpression' && - node.expression.right.params.length === 1 - ) { - name = node.expression.left.name; - block = node.expression.right.body; - } - break; - } - case 'FunctionDeclaration': { - if (node.params.length === 1) { - name = _optionalChain$1([ - node, - 'access', - (_) => _.id, - 'optionalAccess', - (_2) => _2.name, - ]); - block = node.body; - } - break; - } - } - if (!block || !name) { - return null; - } - const tryNode = block.body.at(-2); - if ( - _optionalChain$1([tryNode, 'optionalAccess', (_3) => _3.type]) !== - 'TryStatement' || - _optionalChain$1([ - tryNode, - 'access', - (_4) => _4.handler, - 'optionalAccess', - (_5) => _5.type, - ]) !== 'CatchClause' - ) { - return null; - } - const catchBody = tryNode.handler.body.body; - if (matchesStructure(catchBody, catchBlockBody)) { - return makeSolverFuncFromName(name); - } return null; } - if (node.type === 'VariableDeclaration') { - for (const declaration of node.declarations) { - if ( - declaration.type !== 'VariableDeclarator' || - !declaration.init || - declaration.init.type !== 'ArrayExpression' || - declaration.init.elements.length !== 1 - ) { - continue; - } - const [firstElement] = declaration.init.elements; - if (firstElement && firstElement.type === 'Identifier') { - return makeSolverFuncFromName(firstElement.name); - } + const options = []; + if (node.type === 'FunctionDeclaration') { + if ( + node.id && + _optionalChain$1([ + node, + 'access', + (_) => _.body, + 'optionalAccess', + (_2) => _2.body, + ]) + ) { + options.push({ + name: node.id, + statements: _optionalChain$1([ + node, + 'access', + (_3) => _3.body, + 'optionalAccess', + (_4) => _4.body, + ]), + }); } } else if (node.type === 'ExpressionStatement') { - const expr = node.expression; - if ( - expr.type === 'AssignmentExpression' && - expr.left.type === 'Identifier' && - expr.operator === '=' && - expr.right.type === 'ArrayExpression' && - expr.right.elements.length === 1 - ) { - const [firstElement] = expr.right.elements; - if (firstElement && firstElement.type === 'Identifier') { - return makeSolverFuncFromName(firstElement.name); + if (node.expression.type !== 'AssignmentExpression') { + return null; + } + const name = node.expression.left; + const body = _optionalChain$1([ + node.expression.right, + 'optionalAccess', + (_5) => _5.body, + 'optionalAccess', + (_6) => _6.body, + ]); + if (name && body) { + options.push({ name: name, statements: body }); + } + } else if (node.type === 'VariableDeclaration') { + for (const declaration of node.declarations) { + const name = declaration.id; + const body = _optionalChain$1([ + declaration.init, + 'optionalAccess', + (_7) => _7.body, + 'optionalAccess', + (_8) => _8.body, + ]); + if (name && body) { + options.push({ name: name, statements: body }); } } } + for (const { name: name, statements: statements } of options) { + if (matchesStructure(statements, { anykey: [asdasd] })) { + return createSolver(name); + } + } return null; } - function makeSolverFuncFromName(name) { - return { - type: 'ArrowFunctionExpression', - params: [{ type: 'Identifier', name: 'n' }], - body: { - type: 'CallExpression', - callee: { type: 'Identifier', name: name }, - arguments: [{ type: 'Identifier', name: 'n' }], - optional: false, - }, - async: false, - expression: false, - generator: false, - }; + function createSolver(expression) { + return generateArrowFunction( + `\n({sig, n}) => {\n const url = (${astring.generate(expression)})("https://youtube.com/watch?v=yt-dlp-wins", "s", sig ? encodeURIComponent(sig) : undefined);\n url.set("n", n);\n const proto = Object.getPrototypeOf(url);\n const keys = Object.keys(proto).concat(Object.getOwnPropertyNames(proto));\n for (const key of keys) {\n if (!["constructor", "set", "get", "clone"].includes(key)) {\n url[key]();\n break;\n }\n }\n const s = url.get("s");\n return {\n sig: s ? decodeURIComponent(s) : null,\n n: url.get("n") ?? null,\n };\n}\n`, + ); } const setupNodes = meriyah.parse( - `\nif (typeof globalThis.XMLHttpRequest === "undefined") {\n globalThis.XMLHttpRequest = { prototype: {} };\n}\nconst window = Object.create(null);\nif (typeof URL === "undefined") {\n window.location = {\n hash: "",\n host: "www.youtube.com",\n hostname: "www.youtube.com",\n href: "https://www.youtube.com/watch?v=yt-dlp-wins",\n origin: "https://www.youtube.com",\n password: "",\n pathname: "/watch",\n port: "",\n protocol: "https:",\n search: "?v=yt-dlp-wins",\n username: "",\n };\n} else {\n window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");\n}\nif (typeof globalThis.document === "undefined") {\n globalThis.document = Object.create(null);\n}\nif (typeof globalThis.navigator === "undefined") {\n globalThis.navigator = Object.create(null);\n}\nif (typeof globalThis.self === "undefined") {\n globalThis.self = globalThis;\n}\n`, + `\nif (typeof globalThis.XMLHttpRequest === "undefined") {\n globalThis.XMLHttpRequest = { prototype: {} };\n}\nif (typeof URL === "undefined") {\n globalThis.location = {\n hash: "",\n host: "www.youtube.com",\n hostname: "www.youtube.com",\n href: "https://www.youtube.com/watch?v=yt-dlp-wins",\n origin: "https://www.youtube.com",\n password: "",\n pathname: "/watch",\n port: "",\n protocol: "https:",\n search: "?v=yt-dlp-wins",\n username: "",\n };\n} else {\n globalThis.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");\n}\nif (typeof globalThis.document === "undefined") {\n globalThis.document = Object.create(null);\n}\nif (typeof globalThis.navigator === "undefined") {\n globalThis.navigator = Object.create(null);\n}\nif (typeof globalThis.self === "undefined") {\n globalThis.self = globalThis;\n}\nif (typeof globalThis.window === "undefined") {\n globalThis.window = globalThis;\n}\n`, ).body; function _optionalChain(ops) { let lastAccessLHS = undefined; @@ -419,8 +199,31 @@ var jsc = (function (meriyah, astring) { return value; } function preprocessPlayer(data) { - const ast = meriyah.parse(data); - const body = ast.body; + const program = meriyah.parse(data); + const plainStatements = modifyPlayer(program); + const solutions = getSolutions(plainStatements); + for (const [name, options] of Object.entries(solutions)) { + plainStatements.push({ + type: 'ExpressionStatement', + expression: { + type: 'AssignmentExpression', + operator: '=', + left: { + type: 'MemberExpression', + computed: false, + object: { type: 'Identifier', name: '_result' }, + property: { type: 'Identifier', name: name }, + optional: false, + }, + right: multiTry(options), + }, + }); + } + program.body.splice(0, 0, ...setupNodes); + return astring.generate(program); + } + function modifyPlayer(program) { + const body = program.body; const block = (() => { switch (body.length) { case 1: { @@ -453,16 +256,7 @@ var jsc = (function (meriyah, astring) { } throw 'unexpected structure'; })(); - const found = { n: [], sig: [] }; - const plainExpressions = block.body.filter((node) => { - const n = extract(node); - if (n) { - found.n.push(n); - } - const sig = extract$1(node); - if (sig) { - found.sig.push(sig); - } + block.body = block.body.filter((node) => { if (node.type === 'ExpressionStatement') { if (node.expression.type === 'AssignmentExpression') { return true; @@ -471,41 +265,65 @@ var jsc = (function (meriyah, astring) { } return true; }); - block.body = plainExpressions; - for (const [name, options] of Object.entries(found)) { - const unique = new Set(options.map((x) => JSON.stringify(x))); - if (unique.size !== 1) { - const message = `found ${unique.size} ${name} function possibilities`; - throw ( - message + - (unique.size - ? `: ${options.map((x) => astring.generate(x)).join(', ')}` - : '') - ); + return block.body; + } + function getSolutions(statements) { + const found = { n: [], sig: [] }; + for (const statement of statements) { + const result = extract(statement); + if (result) { + found.n.push(makeSolver(result, { type: 'Identifier', name: 'n' })); + found.sig.push(makeSolver(result, { type: 'Identifier', name: 'sig' })); } - plainExpressions.push({ - type: 'ExpressionStatement', - expression: { - type: 'AssignmentExpression', - operator: '=', - left: { - type: 'MemberExpression', - computed: false, - object: { type: 'Identifier', name: '_result' }, - property: { type: 'Identifier', name: name }, - }, - right: options[0], - }, - }); } - ast.body.splice(0, 0, ...setupNodes); - return astring.generate(ast); + return found; + } + function makeSolver(result, ident) { + return { + type: 'ArrowFunctionExpression', + params: [ident], + body: { + type: 'MemberExpression', + object: { + type: 'CallExpression', + callee: result, + arguments: [ + { + type: 'ObjectExpression', + properties: [ + { + type: 'Property', + key: ident, + value: ident, + kind: 'init', + computed: false, + method: false, + shorthand: true, + }, + ], + }, + ], + optional: false, + }, + computed: false, + property: ident, + optional: false, + }, + async: false, + expression: true, + generator: false, + }; } function getFromPrepared(code) { const resultObj = { n: null, sig: null }; Function('_result', code)(resultObj); return resultObj; } + function multiTry(generators) { + return generateArrowFunction( + `\n(_input) => {\n const _results = new Set();\n const errors = [];\n for (const _generator of ${astring.generate({ type: 'ArrayExpression', elements: generators })}) {\n try {\n _results.add(_generator(_input));\n } catch (e) {\n errors.push(e);\n }\n }\n if (!_results.size) {\n throw \`no solutions: \${errors.join(", ")}\`;\n }\n if (_results.size !== 1) {\n throw \`invalid solutions: \${[..._results].map(x => JSON.stringify(x)).join(", ")}\`;\n }\n return _results.values().next().value;\n}\n`, + ); + } function main(input) { const preprocessedPlayer = input.type === 'player' diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_director.py b/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_director.py index 26e7a6a..9e467c8 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_director.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_director.py @@ -6,6 +6,7 @@ import dataclasses import datetime as dt import hashlib import json +import re import traceback import typing import urllib.parse @@ -63,9 +64,9 @@ class YoutubeIEContentProviderLogger(IEContentProviderLogger): if self.log_level <= self.LogLevel.DEBUG: self.__ie.write_debug(self._format_msg(message), only_once=once) - def info(self, message: str): + def info(self, message: str, *, once=False): if self.log_level <= self.LogLevel.INFO: - self.__ie.to_screen(self._format_msg(message)) + self.__ie.to_screen(self._format_msg(message), only_once=once) def warning(self, message: str, *, once=False): if self.log_level <= self.LogLevel.WARNING: @@ -433,9 +434,13 @@ def provider_display_list(providers: Iterable[IEContentProvider]): def clean_pot(po_token: str): # Clean and validate the PO Token. This will strip invalid characters off # (e.g. additional url params the user may accidentally include) + mobj = re.match(r'([^?&#]+)', urllib.parse.unquote(po_token)) + if not mobj: + raise ValueError('Invalid PO Token') + try: return base64.urlsafe_b64encode( - base64.urlsafe_b64decode(urllib.parse.unquote(po_token))).decode() + base64.urlsafe_b64decode(mobj.group(1))).decode() except (binascii.Error, ValueError): raise ValueError('Invalid PO Token') diff --git a/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_provider.py b/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_provider.py index 3aa4673..0ba7c30 100644 --- a/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_provider.py +++ b/plugins/youtube_download/yt_dlp/extractor/youtube/pot/_provider.py @@ -40,7 +40,7 @@ class IEContentProviderLogger(abc.ABC): pass @abc.abstractmethod - def info(self, message: str): + def info(self, message: str, *, once=False): pass @abc.abstractmethod diff --git a/plugins/youtube_download/yt_dlp/extractor/zapiks.py b/plugins/youtube_download/yt_dlp/extractor/zapiks.py index fcfebcd..fbd2292 100644 --- a/plugins/youtube_download/yt_dlp/extractor/zapiks.py +++ b/plugins/youtube_download/yt_dlp/extractor/zapiks.py @@ -1,110 +1,205 @@ +import json import re +import urllib.parse from .common import InfoExtractor from ..utils import ( + clean_html, + extract_attributes, int_or_none, parse_duration, - parse_iso8601, - xpath_text, - xpath_with_ns, + parse_resolution, + str_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import ( + find_element, + find_elements, + traverse_obj, ) class ZapiksIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P.+?)\.html|index\.php\?.*\bmedia_id=(?P\d+))' - _EMBED_REGEX = [r']+src="(?Phttps?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"'] + _VALID_URL = [ + r'https?://(?:www\.)?zapiks\.(?:com|fr)/(?P[\w-]+)\.html', + r'https?://(?:www\.)?zapiks\.fr/index\.php\?(?:[^#]+&)?media_id=(?P\d+)', + ] + _EMBED_REGEX = [r']+\bsrc=["\'](?P(?:https?:)?//(?:www\.)?zapiks\.fr/index\.php\?(?:[^#"\']+&(?:amp;)?)?media_id=\d+)'] _TESTS = [{ - 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', + 'url': 'https://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', 'md5': 'aeb3c473b2d564b2d46d664d28d5f050', 'info_dict': { 'id': '80798', 'ext': 'mp4', 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', - 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', - 'thumbnail': r're:https?://zpks\.com/.+\.jpg', + 'description': 'md5:db07a553c1550e2905bceafa923000fd', + 'display_id': 'ep2s3-bon-appetit-eh-be-viva', 'duration': 528, + 'tags': 'count:5', + 'thumbnail': r're:https?://zpks\.com/.+', 'timestamp': 1359044972, 'upload_date': '20130124', + 'uploader': 'BonAppetit', + 'uploader_id': 'bonappetit', 'view_count': int, }, }, { - 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', - 'only_matching': True, - }, { - 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', - 'only_matching': True, - }, { - 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', - 'only_matching': True, - }] - _WEBPAGE_TESTS = [{ - 'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'url': 'https://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', + 'md5': '196fe42901639d868956b1dcaa48de15', 'info_dict': { 'id': '118046', 'ext': 'mp4', 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', - 'thumbnail': r're:https?://zpks\.com/.+\.jpg', + 'display_id': 'ep3s5-bon-appetit-baqueira-m-1', + 'duration': 642, + 'tags': 'count:8', + 'thumbnail': r're:https?://zpks\.com/.+', + 'timestamp': 1424370543, + 'upload_date': '20150219', + 'uploader': 'BonAppetit', + 'uploader_id': 'bonappetit', + 'view_count': int, + }, + }, { + 'url': 'https://www.zapiks.fr/index.php?action=playerIframe&media_id=164049', + 'md5': 'fb81a7c9b7b84c00ba111028aee593b8', + 'info_dict': { + 'id': '164049', + 'ext': 'mp4', + 'title': 'Courchevel Hiver 2025/2026', + 'display_id': 'courchevel-hiver-2025-2026', + 'duration': 38, + 'tags': 'count:1', + 'thumbnail': r're:https?://zpks\.com/.+', + 'timestamp': 1769019147, + 'upload_date': '20260121', + 'uploader': 'jamrek', + 'uploader_id': 'jamrek', + 'view_count': int, + }, + }, { + # https://www.youtube.com/watch?v=UBAABvegu2M + 'url': 'https://www.zapiks.com/live-fwt18-vallnord-arcalis-.html', + 'info_dict': { + 'id': 'UBAABvegu2M', + 'ext': 'mp4', + 'title': 'Replay Live - FWT18 Vallnord-Arcalís Andorra - Freeride World Tour 2018', + 'age_limit': 0, + 'availability': 'public', + 'categories': ['Sports'], + 'channel': 'FIS Freeride World Tour by Peak Performance', + 'channel_follower_count': int, + 'channel_id': 'UCraJ3GNFfw6LXFuCV6McByg', + 'channel_url': 'https://www.youtube.com/channel/UCraJ3GNFfw6LXFuCV6McByg', + 'comment_count': int, + 'description': 'md5:2d9fefef758d5ad0d5a987d46aff7572', + 'duration': 11328, + 'heatmap': 'count:100', + 'like_count': int, + 'live_status': 'was_live', + 'media_type': 'livestream', + 'playable_in_embed': True, + 'release_date': '20180306', + 'release_timestamp': 1520321809, + 'tags': 'count:27', + 'thumbnail': r're:https?://i\.ytimg\.com/.+', + 'timestamp': 1520336958, + 'upload_date': '20180306', + 'uploader': 'FIS Freeride World Tour by Peak Performance', + 'uploader_id': '@FISFreerideWorldTour', + 'uploader_url': 'https://www.youtube.com/@FISFreerideWorldTour', + 'view_count': int, + }, + 'add_ie': ['Youtube'], + }, { + # https://vimeo.com/235746460 + 'url': 'https://www.zapiks.fr/waking-dream-2017-full-movie.html', + 'info_dict': { + 'id': '235746460', + 'ext': 'mp4', + 'title': '"WAKING DREAM" (2017) Full Movie by Sam Favret & Julien Herry', + 'duration': 1649, + 'thumbnail': r're:https?://i\.vimeocdn\.com/video/.+', + 'uploader': 'Favret Sam', + 'uploader_id': 'samfavret', + 'uploader_url': 'https://vimeo.com/samfavret', + }, + 'add_ie': ['Vimeo'], + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }] + _WEBPAGE_TESTS = [{ + # https://www.zapiks.fr/ep3s5-bon-appetit-baqueira-m-1.html + # https://www.zapiks.fr/index.php?action=playerIframe&media_id=118046 + 'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'md5': '196fe42901639d868956b1dcaa48de15', + 'info_dict': { + 'id': '118046', + 'ext': 'mp4', + 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', + 'description': 'md5:b45295c3897c4c01d7c04e8484c26aaf', + 'display_id': 'ep3s5-bon-appetit-baqueira-m-1', + 'duration': 642, + 'tags': 'count:8', + 'thumbnail': r're:https?://zpks\.com/.+', + 'timestamp': 1424370543, + 'upload_date': '20150219', + 'uploader': 'BonAppetit', + 'uploader_id': 'bonappetit', + 'view_count': int, }, }] + _UPLOADER_ID_RE = re.compile(r'/pro(?:fil)?/(?P[^/?#]+)/?') def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + if embed_url := traverse_obj(webpage, ( + {find_element(cls='embed-container')}, {find_element(tag='iframe', html=True)}, + {extract_attributes}, 'src', {self._proto_relative_url}, {url_or_none}, + )): + if not self.suitable(embed_url): + return self.url_result(embed_url) - if not video_id: - video_id = self._search_regex( - r'data-media-id="(\d+)"', webpage, 'video id') - - playlist = self._download_xml( - f'http://www.zapiks.fr/view/index.php?action=playlist&media_id={video_id}&lang=en', - display_id) - - NS_MAP = { - 'jwplayer': 'http://rss.jwpcdn.com/', - } - - def ns(path): - return xpath_with_ns(path, NS_MAP) - - item = playlist.find('./channel/item') - - title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) - description = self._og_search_description(webpage, default=None) - thumbnail = xpath_text( - item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) - duration = parse_duration(self._html_search_meta( - 'duration', webpage, 'duration', default=None)) - timestamp = parse_iso8601(self._html_search_meta( - 'uploadDate', webpage, 'upload date', default=None), ' ') - - view_count = int_or_none(self._search_regex( - r'UserPlays:(\d+)', webpage, 'view count', default=None)) - comment_count = int_or_none(self._search_regex( - r'UserComments:(\d+)', webpage, 'comment count', default=None)) + video_responsive = traverse_obj(webpage, ( + {find_element(cls='video-responsive', html=True)}, {extract_attributes}, {dict})) + data_media_url = traverse_obj(video_responsive, ('data-media-url', {url_or_none})) + if data_media_url and urllib.parse.urlparse(url).path == '/index.php': + return self.url_result(data_media_url, ZapiksIE) + data_playlist = traverse_obj(video_responsive, ('data-playlist', {json.loads}, ..., any)) formats = [] - for source in item.findall(ns('./jwplayer:source')): - format_id = source.attrib['label'] - f = { - 'url': source.attrib['file'], + for source in traverse_obj(data_playlist, ( + 'sources', lambda _, v: url_or_none(v['file']), + )): + format_id = traverse_obj(source, ('label', {str_or_none})) + formats.append({ 'format_id': format_id, - } - m = re.search(r'^(?P\d+)[pP]', format_id) - if m: - f['height'] = int(m.group('height')) - formats.append(f) + 'url': source['file'], + **parse_resolution(format_id), + }) return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'view_count': view_count, - 'comment_count': comment_count, + 'display_id': display_id, + 'duration': parse_duration(self._html_search_meta('duration', webpage, default=None)), 'formats': formats, + 'timestamp': unified_timestamp(self._html_search_meta('uploadDate', webpage, default=None)), + **traverse_obj(webpage, { + 'description': ({find_element(cls='description-text')}, {clean_html}, filter), + 'tags': ( + {find_elements(cls='bs-label', html=True)}, + ..., {extract_attributes}, 'title', {clean_html}, filter), + 'view_count': ( + {find_element(cls='video-content-view-counter')}, {clean_html}, + {lambda x: re.sub(r'(?:vues|views|\s+)', '', x)}, {int_or_none}), + }), + **traverse_obj(webpage, ({find_element(cls='video-content-user-link', html=True)}, { + 'uploader': ({clean_html}, filter), + 'uploader_id': ({extract_attributes}, 'href', {self._UPLOADER_ID_RE.fullmatch}, 'id'), + })), + **traverse_obj(data_playlist, { + 'id': ('mediaid', {str_or_none}), + 'title': ('title', {clean_html}, filter), + 'thumbnail': ('image', {url_or_none}), + }), } diff --git a/plugins/youtube_download/yt_dlp/jsinterp.py b/plugins/youtube_download/yt_dlp/jsinterp.py index d22d176..5104f63 100644 --- a/plugins/youtube_download/yt_dlp/jsinterp.py +++ b/plugins/youtube_download/yt_dlp/jsinterp.py @@ -18,6 +18,14 @@ from .utils import ( ) +def int_to_int32(n): + """Converts an integer to a signed 32-bit integer""" + n &= 0xFFFFFFFF + if n & 0x80000000: + return n - 0x100000000 + return n + + def _js_bit_op(op): def zeroise(x): if x in (None, JS_Undefined): @@ -28,7 +36,7 @@ def _js_bit_op(op): return int(float(x)) def wrapped(a, b): - return op(zeroise(a), zeroise(b)) & 0xffffffff + return int_to_int32(op(int_to_int32(zeroise(a)), int_to_int32(zeroise(b)))) return wrapped @@ -368,6 +376,10 @@ class JSInterpreter: if not _OPERATORS.get(op): return right_val + # TODO: This is only correct for str+str and str+number; fix for str+array, str+object, etc + if op == '+' and (isinstance(left_val, str) or isinstance(right_val, str)): + return f'{left_val}{right_val}' + try: return _OPERATORS[op](left_val, right_val) except Exception as e: @@ -377,7 +389,7 @@ class JSInterpreter: if idx == 'length': return len(obj) try: - return obj[int(idx)] if isinstance(obj, list) else obj[idx] + return obj[int(idx)] if isinstance(obj, list) else obj[str(idx)] except Exception as e: if allow_undefined: return JS_Undefined diff --git a/plugins/youtube_download/yt_dlp/networking/_curlcffi.py b/plugins/youtube_download/yt_dlp/networking/_curlcffi.py index e6baf48..3e43024 100644 --- a/plugins/youtube_download/yt_dlp/networking/_curlcffi.py +++ b/plugins/youtube_download/yt_dlp/networking/_curlcffi.py @@ -25,7 +25,7 @@ from .exceptions import ( ) from .impersonate import ImpersonateRequestHandler, ImpersonateTarget from ..dependencies import curl_cffi, certifi -from ..utils import int_or_none +from ..utils import int_or_none, version_tuple if curl_cffi is None: raise ImportError('curl_cffi is not installed') @@ -33,9 +33,9 @@ if curl_cffi is None: curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3])) -if curl_cffi_version != (0, 5, 10) and not (0, 10) <= curl_cffi_version < (0, 14): +if curl_cffi_version != (0, 5, 10) and not (0, 10) <= curl_cffi_version < (0, 16): curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)' - raise ImportError('Only curl_cffi versions 0.5.10, 0.10.x, 0.11.x, 0.12.x, 0.13.x are supported') + raise ImportError('Only curl_cffi versions 0.5.10 and 0.10.x through 0.15.x are supported') import curl_cffi.requests from curl_cffi.const import CurlECode, CurlOpt @@ -123,31 +123,31 @@ BROWSER_TARGETS: dict[tuple[int, ...], dict[str, ImpersonateTarget]] = { 'chrome110': ImpersonateTarget('chrome', '110', 'windows', '10'), 'edge99': ImpersonateTarget('edge', '99', 'windows', '10'), 'edge101': ImpersonateTarget('edge', '101', 'windows', '10'), + }, + (0, 6): { + 'chrome116': ImpersonateTarget('chrome', '116', 'windows', '10'), + 'chrome119': ImpersonateTarget('chrome', '119', 'macos', '14'), + 'chrome120': ImpersonateTarget('chrome', '120', 'macos', '14'), + 'safari170': ImpersonateTarget('safari', '17.0', 'macos', '14'), + 'safari172_ios': ImpersonateTarget('safari', '17.2', 'ios', '17.2'), + # safari153 and safari155 were available in 0.5.10, but fingerprints were wrong until 0.6.0 + # Ref: https://github.com/lwthiker/curl-impersonate/issues/215 'safari153': ImpersonateTarget('safari', '15.3', 'macos', '11'), 'safari155': ImpersonateTarget('safari', '15.5', 'macos', '12'), }, (0, 7): { - 'chrome116': ImpersonateTarget('chrome', '116', 'windows', '10'), - 'chrome119': ImpersonateTarget('chrome', '119', 'macos', '14'), - 'chrome120': ImpersonateTarget('chrome', '120', 'macos', '14'), 'chrome123': ImpersonateTarget('chrome', '123', 'macos', '14'), 'chrome124': ImpersonateTarget('chrome', '124', 'macos', '14'), - 'safari170': ImpersonateTarget('safari', '17.0', 'macos', '14'), - 'safari172_ios': ImpersonateTarget('safari', '17.2', 'ios', '17.2'), + }, + (0, 8): { + 'safari180': ImpersonateTarget('safari', '18.0', 'macos', '15'), + 'safari180_ios': ImpersonateTarget('safari', '18.0', 'ios', '18.0'), }, (0, 9): { - 'safari153': ImpersonateTarget('safari', '15.3', 'macos', '14'), - 'safari155': ImpersonateTarget('safari', '15.5', 'macos', '14'), - 'chrome119': ImpersonateTarget('chrome', '119', 'macos', '14'), - 'chrome120': ImpersonateTarget('chrome', '120', 'macos', '14'), - 'chrome123': ImpersonateTarget('chrome', '123', 'macos', '14'), - 'chrome124': ImpersonateTarget('chrome', '124', 'macos', '14'), 'chrome131': ImpersonateTarget('chrome', '131', 'macos', '14'), 'chrome131_android': ImpersonateTarget('chrome', '131', 'android', '14'), 'chrome133a': ImpersonateTarget('chrome', '133', 'macos', '15'), 'firefox133': ImpersonateTarget('firefox', '133', 'macos', '14'), - 'safari180': ImpersonateTarget('safari', '18.0', 'macos', '15'), - 'safari180_ios': ImpersonateTarget('safari', '18.0', 'ios', '18.0'), }, (0, 10): { 'firefox135': ImpersonateTarget('firefox', '135', 'macos', '14'), @@ -162,6 +162,18 @@ BROWSER_TARGETS: dict[tuple[int, ...], dict[str, ImpersonateTarget]] = { 'safari260': ImpersonateTarget('safari', '26.0', 'macos', '26'), 'safari260_ios': ImpersonateTarget('safari', '26.0', 'ios', '26.0'), }, + (0, 14): { + 'chrome142': ImpersonateTarget('chrome', '142', 'macos', '26'), + 'safari2601': ImpersonateTarget('safari', '26.0.1', 'macos', '26'), + }, + (0, 15): { + 'chrome145': ImpersonateTarget('chrome', '145', 'macos', '26'), + 'chrome146': ImpersonateTarget('chrome', '146', 'macos', '26'), + # firefox144 was added in 0.14.0, but its UA was wrong until 0.15.0 + # Ref: https://github.com/lexiforest/curl-impersonate/issues/234 + 'firefox144': ImpersonateTarget('firefox', '144', 'macos', '26'), + 'firefox147': ImpersonateTarget('firefox', '147', 'macos', '26'), + }, } # Needed for curl_cffi < 0.11 @@ -175,6 +187,13 @@ _TARGETS_COMPAT_LOOKUP = { 'safari180_ios': 'safari18_0_ios', } +# These targets are known to be insufficient, unreliable or blocked +# See: https://github.com/yt-dlp/yt-dlp/issues/16012 +_DEPRIORITIZED_TARGETS = { + ImpersonateTarget('chrome', '133', 'macos', '15'), # chrome133a + ImpersonateTarget('chrome', '136', 'macos', '15'), # chrome136 +} + @register_rh class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): @@ -192,12 +211,14 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): for version, targets in BROWSER_TARGETS.items() if curl_cffi_version >= version ), key=lambda x: ( + # deprioritize unreliable targets so they are not selected by default + x[1] not in _DEPRIORITIZED_TARGETS, # deprioritize mobile targets since they give very different behavior x[1].os not in ('ios', 'android'), # prioritize tor < edge < firefox < safari < chrome ('tor', 'edge', 'firefox', 'safari', 'chrome').index(x[1].client), # prioritize newest version - float(x[1].version) if x[1].version else 0, + version_tuple(x[1].version or '0'), # group by os name x[1].os, ), reverse=True)).items() diff --git a/plugins/youtube_download/yt_dlp/options.py b/plugins/youtube_download/yt_dlp/options.py index 14b582f..ab25540 100644 --- a/plugins/youtube_download/yt_dlp/options.py +++ b/plugins/youtube_download/yt_dlp/options.py @@ -511,7 +511,7 @@ def create_parser(): general.add_option( '--live-from-start', action='store_true', dest='live_from_start', - help='Download livestreams from the start. Currently experimental and only supported for YouTube and Twitch') + help='Download livestreams from the start. Currently experimental and only supported for YouTube, Twitch, and TVer') general.add_option( '--no-live-from-start', action='store_false', dest='live_from_start', @@ -574,7 +574,8 @@ def create_parser(): '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': ['2024', 'prefer-vp9-sort'], - '2024': ['mtime-by-default'], + '2024': ['2025', 'mtime-by-default'], + '2025': [], }, }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' @@ -882,6 +883,10 @@ def create_parser(): dest='format_sort', default=[], type='str', action='callback', callback=_list_from_options_callback, callback_kwargs={'append': -1}, help='Sort the formats by the fields given, see "Sorting Formats" for more details') + video_format.add_option( + '--format-sort-reset', + dest='format_sort', action='store_const', const=[], + help='Disregard previous user specified sort order and reset to the default') video_format.add_option( '--format-sort-force', '--S-force', action='store_true', dest='format_sort_force', metavar='FORMAT', default=False, @@ -1175,7 +1180,7 @@ def create_parser(): workarounds.add_option( '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', - help='Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube)') + help='Use an unencrypted connection to retrieve information about the video') workarounds.add_option( '--user-agent', metavar='UA', dest='user_agent', diff --git a/plugins/youtube_download/yt_dlp/update.py b/plugins/youtube_download/yt_dlp/update.py index c957e0d..c22843d 100644 --- a/plugins/youtube_download/yt_dlp/update.py +++ b/plugins/youtube_download/yt_dlp/update.py @@ -316,7 +316,7 @@ class Updater: return json.loads(self.ydl.urlopen(Request(url, headers={ 'Accept': 'application/vnd.github+json', 'User-Agent': 'yt-dlp', - 'X-GitHub-Api-Version': '2022-11-28', + 'X-GitHub-Api-Version': '2026-03-10', })).read().decode()) def _get_version_info(self, tag: str) -> tuple[str | None, str | None]: diff --git a/plugins/youtube_download/yt_dlp/utils/_jsruntime.py b/plugins/youtube_download/yt_dlp/utils/_jsruntime.py index 4ea230d..b77bb9f 100644 --- a/plugins/youtube_download/yt_dlp/utils/_jsruntime.py +++ b/plugins/youtube_download/yt_dlp/utils/_jsruntime.py @@ -5,6 +5,7 @@ import dataclasses import functools import os.path import sys +import sysconfig from ._utils import _get_exe_version_output, detect_exe_version, version_tuple @@ -13,6 +14,13 @@ _FALLBACK_PATHEXT = ('.COM', '.EXE', '.BAT', '.CMD') def _find_exe(basename: str) -> str: + # Check in Python "scripts" path, e.g. for pipx-installed binaries + binary = os.path.join( + sysconfig.get_path('scripts'), + basename + sysconfig.get_config_var('EXE')) + if os.access(binary, os.F_OK | os.X_OK) and not os.path.isdir(binary): + return binary + if os.name != 'nt': return basename @@ -33,12 +41,12 @@ def _find_exe(basename: str) -> str: else: exts = tuple(ext for ext in pathext.split(os.pathsep) if ext) - visited = [] + visited = set() for path in map(os.path.realpath, paths): normed = os.path.normcase(path) if normed in visited: continue - visited.append(normed) + visited.add(normed) for ext in exts: binary = os.path.join(path, f'{basename}{ext}') @@ -79,7 +87,7 @@ class JsRuntime(abc.ABC): class DenoJsRuntime(JsRuntime): - MIN_SUPPORTED_VERSION = (2, 0, 0) + MIN_SUPPORTED_VERSION = (2, 3, 0) def _info(self): path = _determine_runtime_path(self._path, 'deno') @@ -94,7 +102,7 @@ class DenoJsRuntime(JsRuntime): class BunJsRuntime(JsRuntime): - MIN_SUPPORTED_VERSION = (1, 0, 31) + MIN_SUPPORTED_VERSION = (1, 2, 11) def _info(self): path = _determine_runtime_path(self._path, 'bun') @@ -109,7 +117,7 @@ class BunJsRuntime(JsRuntime): class NodeJsRuntime(JsRuntime): - MIN_SUPPORTED_VERSION = (20, 0, 0) + MIN_SUPPORTED_VERSION = (22, 0, 0) def _info(self): path = _determine_runtime_path(self._path, 'node') diff --git a/plugins/youtube_download/yt_dlp/utils/_utils.py b/plugins/youtube_download/yt_dlp/utils/_utils.py index 695be84..74bb6dc 100644 --- a/plugins/youtube_download/yt_dlp/utils/_utils.py +++ b/plugins/youtube_download/yt_dlp/utils/_utils.py @@ -75,6 +75,9 @@ MONTH_NAMES = { 'fr': [ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], + 'is': [ + 'janúar', 'febrúar', 'mars', 'apríl', 'maí', 'júní', + 'júlí', 'ágúst', 'september', 'október', 'nóvember', 'desember'], # these follow the genitive grammatical case (dopełniacz) # some websites might be using nominative, which will require another month list # https://en.wikibooks.org/wiki/Polish/Noun_cases diff --git a/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py b/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py index ac5bd68..f3a6e18 100644 --- a/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py +++ b/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py @@ -62,10 +62,10 @@ def parse_iter(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Cal if isinstance(source, tuple): name, source, reviver = source try: - resolved[source] = target[index] = reviver(target[index]) + target[index] = reviver(target[index]) except Exception as error: yield TypeError(f'failed to parse {source} as {name!r}: {error}') - resolved[source] = target[index] = None + target[index] = None continue if source in resolved: diff --git a/plugins/youtube_download/yt_dlp/utils/networking.py b/plugins/youtube_download/yt_dlp/utils/networking.py index 0a48543..58015c0 100644 --- a/plugins/youtube_download/yt_dlp/utils/networking.py +++ b/plugins/youtube_download/yt_dlp/utils/networking.py @@ -17,7 +17,7 @@ from .traversal import traverse_obj def random_user_agent(): USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' # Target versions released within the last ~6 months - CHROME_MAJOR_VERSION_RANGE = (137, 143) + CHROME_MAJOR_VERSION_RANGE = (142, 148) return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') diff --git a/plugins/youtube_download/yt_dlp/version.py b/plugins/youtube_download/yt_dlp/version.py index 2d4d809..6d866f4 100644 --- a/plugins/youtube_download/yt_dlp/version.py +++ b/plugins/youtube_download/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.12.08' +__version__ = '2026.03.17' -RELEASE_GIT_HEAD = '7a52ff29d86efc8f3adeba977b2009ce40b8e52e' +RELEASE_GIT_HEAD = '04d6974f502bbdfaed72c624344f262e30ad9708' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.12.08' +_pkg_version = '2026.03.17' diff --git a/src/core/widgets/icon_grid_widget.py b/src/core/widgets/icon_grid_widget.py index 525ade8..fcd4f13 100644 --- a/src/core/widgets/icon_grid_widget.py +++ b/src/core/widgets/icon_grid_widget.py @@ -59,7 +59,7 @@ class IconGridWidget(Gtk.IconView): ): self._handler_ids = [ - self.connect("button_release_event", grid_icon_single_click), + self.connect("button-release-event", grid_icon_single_click), self.connect("item-activated", grid_icon_double_click), self.connect("selection-changed", grid_set_selected_items), self.connect("drag-data-get", grid_on_drag_set), diff --git a/user_config/bin/solarfm b/user_config/bin/solarfm index 9921548..51ea1da 100755 --- a/user_config/bin/solarfm +++ b/user_config/bin/solarfm @@ -6,6 +6,8 @@ # set -o errexit ## To exit on error # set -o errunset ## To exit if a variable is referenced but not set +export PYTHONDONTWRITEBYTECODE=1 + function main() { call_path=`pwd`