Upgraded youtube_download plugin
This commit is contained in:
@@ -16,7 +16,7 @@ from ._redirect import (
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
)
|
||||
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE
|
||||
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchIE, YoutubeSearchURLIE
|
||||
from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE
|
||||
from ._video import YoutubeIE
|
||||
|
||||
@@ -39,7 +39,6 @@ for _cls in [
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubePlaylistIE,
|
||||
|
||||
@@ -99,12 +99,11 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20250925.01.00',
|
||||
'clientVersion': '2.20260114.08.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||
@@ -112,20 +111,19 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20250925.01.00',
|
||||
'clientVersion': '2.20260114.08.00',
|
||||
'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
'web_embedded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_EMBEDDED_PLAYER',
|
||||
'clientVersion': '1.20250923.21.00',
|
||||
'clientVersion': '1.20260115.01.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
|
||||
@@ -136,7 +134,7 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_REMIX',
|
||||
'clientVersion': '1.20250922.03.00',
|
||||
'clientVersion': '1.20260114.03.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
|
||||
@@ -166,7 +164,7 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_CREATOR',
|
||||
'clientVersion': '1.20250922.03.00',
|
||||
'clientVersion': '1.20260114.05.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
|
||||
@@ -195,9 +193,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID',
|
||||
'clientVersion': '20.10.38',
|
||||
'clientVersion': '21.02.35',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip',
|
||||
'userAgent': 'com.google.android.youtube/21.02.35 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
@@ -223,21 +221,8 @@ INNERTUBE_CLIENTS = {
|
||||
},
|
||||
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
|
||||
},
|
||||
# Doesn't require a PoToken for some reason
|
||||
'android_sdkless': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID',
|
||||
'clientVersion': '20.10.38',
|
||||
'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
# YouTube Kids videos aren't returned on this client for some reason
|
||||
# "Made for kids" videos aren't available with this client
|
||||
# Using a clientVersion>1.65 may return SABR streams only
|
||||
'android_vr': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
@@ -260,10 +245,10 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS',
|
||||
'clientVersion': '20.10.4',
|
||||
'clientVersion': '21.02.3',
|
||||
'deviceMake': 'Apple',
|
||||
'deviceModel': 'iPhone16,2',
|
||||
'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
|
||||
'userAgent': 'com.google.ios.youtube/21.02.3 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
|
||||
'osName': 'iPhone',
|
||||
'osVersion': '18.3.2.22D82',
|
||||
},
|
||||
@@ -291,7 +276,7 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'MWEB',
|
||||
'clientVersion': '2.20250925.01.00',
|
||||
'clientVersion': '2.20260115.01.00',
|
||||
# mweb previously did not require PO Token with this UA
|
||||
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
||||
},
|
||||
@@ -322,24 +307,24 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5',
|
||||
'clientVersion': '7.20250923.13.00',
|
||||
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
|
||||
'clientVersion': '7.20260114.12.00',
|
||||
# See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
|
||||
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
# See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
|
||||
'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
|
||||
},
|
||||
'tv_downgraded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5',
|
||||
'clientVersion': '5.20251105',
|
||||
'clientVersion': '5.20260114',
|
||||
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
'REQUIRE_AUTH': True,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
'tv_simply': {
|
||||
@@ -365,20 +350,6 @@ INNERTUBE_CLIENTS = {
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 75,
|
||||
},
|
||||
# This client now requires sign-in for every video
|
||||
# It was previously an age-gate workaround for videos that were `playable_in_embed`
|
||||
# It may still be useful if signed into an EU account that is not age-verified
|
||||
'tv_embedded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||
'clientVersion': '2.0',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
|
||||
'REQUIRE_AUTH': True,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -397,7 +368,7 @@ def short_client_name(client_name):
|
||||
|
||||
def _fix_embedded_ytcfg(ytcfg):
|
||||
ytcfg['INNERTUBE_CONTEXT'].setdefault('thirdParty', {}).update({
|
||||
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
|
||||
'embedUrl': 'https://www.reddit.com/', # Can be any valid non-YouTube URL
|
||||
})
|
||||
|
||||
|
||||
@@ -418,7 +389,6 @@ def build_innertube_clients():
|
||||
ytcfg.setdefault('SUPPORTS_COOKIES', False)
|
||||
ytcfg.setdefault('SUPPORTS_AD_PLAYBACK_CONTEXT', False)
|
||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||
ytcfg.setdefault('AUTHENTICATED_USER_AGENT', None)
|
||||
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
||||
|
||||
_, base_client, variant = _split_innertube_client(client)
|
||||
@@ -703,14 +673,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
|
||||
|
||||
def _get_default_ytcfg(self, client='web'):
|
||||
ytcfg = copy.deepcopy(INNERTUBE_CLIENTS[client])
|
||||
|
||||
# Currently, only the tv client needs to use an alternative user-agent when logged-in
|
||||
if ytcfg.get('AUTHENTICATED_USER_AGENT') and self.is_authenticated:
|
||||
client_context = ytcfg.setdefault('INNERTUBE_CONTEXT', {}).setdefault('client', {})
|
||||
client_context['userAgent'] = ytcfg['AUTHENTICATED_USER_AGENT']
|
||||
|
||||
return ytcfg
|
||||
return copy.deepcopy(INNERTUBE_CLIENTS[client])
|
||||
|
||||
def _get_innertube_host(self, client='web'):
|
||||
return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
|
||||
@@ -994,16 +957,25 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
url = {
|
||||
'mweb': 'https://m.youtube.com',
|
||||
'web': 'https://www.youtube.com',
|
||||
'web_safari': 'https://www.youtube.com',
|
||||
'web_music': 'https://music.youtube.com',
|
||||
'web_creator': 'https://studio.youtube.com',
|
||||
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
|
||||
'tv': 'https://www.youtube.com/tv',
|
||||
}.get(client)
|
||||
if not url:
|
||||
return {}
|
||||
|
||||
default_ytcfg = self._get_default_ytcfg(client)
|
||||
|
||||
if default_ytcfg['REQUIRE_AUTH'] and not self.is_authenticated:
|
||||
return {}
|
||||
|
||||
webpage = self._download_webpage_with_retries(
|
||||
url, video_id, note=f'Downloading {client.replace("_", " ").strip()} client config',
|
||||
headers=traverse_obj(self._get_default_ytcfg(client), {
|
||||
headers=traverse_obj(default_ytcfg, {
|
||||
'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}),
|
||||
'Referer': ('INNERTUBE_CONTEXT', 'thirdParty', 'embedUrl', {str}),
|
||||
}))
|
||||
|
||||
ytcfg = self.extract_ytcfg(video_id, webpage) or {}
|
||||
|
||||
@@ -28,21 +28,6 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||
_SEARCH_KEY = 'ytsearchdate'
|
||||
IE_DESC = 'YouTube search, newest videos first'
|
||||
_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
|
||||
_TESTS = [{
|
||||
'url': 'ytsearchdate5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube search URLs with sorting and filter support'
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
|
||||
|
||||
@@ -81,7 +81,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
|
||||
|
||||
title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
|
||||
description = self._get_text(renderer, 'descriptionSnippet')
|
||||
description = self._get_text(renderer, 'descriptionSnippet', ('detailedMetadataSnippets', ..., 'snippetText'))
|
||||
|
||||
duration = int_or_none(renderer.get('lengthSeconds'))
|
||||
if duration is None:
|
||||
@@ -2148,7 +2148,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
|
||||
elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
|
||||
mdata = self._extract_tab_endpoint(
|
||||
f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
|
||||
f'https://music.youtube.com/browse/{item_id}', item_id, default_client='web_music')
|
||||
murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
|
||||
get_all=False, expected_type=str)
|
||||
if not murl:
|
||||
|
||||
@@ -10,7 +10,6 @@ import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
|
||||
from ._base import (
|
||||
@@ -63,6 +62,7 @@ from ...utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
@@ -139,17 +139,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
]
|
||||
_RETURN_TYPE = 'video' # XXX: How to handle multifeed?
|
||||
|
||||
_PLAYER_INFO_RE = (
|
||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||
)
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||
_DEFAULT_CLIENTS = ('tv', 'android_sdkless', 'web')
|
||||
_DEFAULT_JSLESS_CLIENTS = ('android_sdkless', 'web_safari', 'web')
|
||||
_DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari', 'web')
|
||||
_DEFAULT_CLIENTS = ('android_vr', 'web_safari')
|
||||
_DEFAULT_JSLESS_CLIENTS = ('android_vr',)
|
||||
_DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari')
|
||||
# Premium does not require POT (except for subtitles)
|
||||
_DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator', 'web')
|
||||
_DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator')
|
||||
_WEBPAGE_CLIENTS = ('web', 'web_safari')
|
||||
_DEFAULT_WEBPAGE_CLIENT = 'web_safari'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -1443,7 +1440,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['tv_embedded']}},
|
||||
'format': '251-drc',
|
||||
'skip_download': True,
|
||||
},
|
||||
@@ -1690,7 +1686,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'comment_count': 15, # XXX: minimum
|
||||
'comment_count': 15, # XXX: minimum, but investigate if this changes
|
||||
'comments': 'count:15',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1723,7 +1720,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'playable_in_embed': True,
|
||||
'availability': 'unlisted',
|
||||
'live_status': 'not_live',
|
||||
'comment_count': 9, # XXX: minimum
|
||||
'comment_count': 9, # XXX: minimum, but investigate if this changes
|
||||
'comments': 'count:9',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'getcomments': True,
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -1881,13 +1883,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tce': 'player_ias_tce.vflset/en_US/base.js',
|
||||
'es5': 'player_es5.vflset/en_US/base.js',
|
||||
'es6': 'player_es6.vflset/en_US/base.js',
|
||||
'es6_tcc': 'player_es6_tcc.vflset/en_US/base.js',
|
||||
'es6_tce': 'player_es6_tce.vflset/en_US/base.js',
|
||||
'tv': 'tv-player-ias.vflset/tv-player-ias.js',
|
||||
'tv_es6': 'tv-player-es6.vflset/tv-player-es6.js',
|
||||
'phone': 'player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'house': 'house_brand_player.vflset/en_US/base.js', # Used by Google Drive
|
||||
}
|
||||
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
||||
|
||||
@functools.cached_property
|
||||
def _player_js_version(self):
|
||||
return self._configuration_arg('player_js_version', [None])[0] or self._DEFAULT_PLAYER_JS_VERSION
|
||||
|
||||
@functools.cached_property
|
||||
def _webpage_client(self):
|
||||
webpage_client = self._configuration_arg('webpage_client', [self._DEFAULT_WEBPAGE_CLIENT])[0]
|
||||
if webpage_client not in self._WEBPAGE_CLIENTS:
|
||||
self.report_warning(
|
||||
f'Invalid webpage_client "{webpage_client}" requested; '
|
||||
f'falling back to {self._DEFAULT_WEBPAGE_CLIENT}', only_once=True)
|
||||
webpage_client = self._DEFAULT_WEBPAGE_CLIENT
|
||||
return webpage_client
|
||||
|
||||
@functools.cached_property
|
||||
def _skipped_webpage_data(self):
|
||||
skipped = set(self._configuration_arg('webpage_skip'))
|
||||
# If forcing a player version, the webpage player response must be skipped
|
||||
if self._player_js_version != 'actual':
|
||||
skipped.add('player_response')
|
||||
return skipped
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
from yt_dlp.utils import parse_qs
|
||||
@@ -1913,13 +1939,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
start_time = time.time()
|
||||
formats = [f for f in formats if f.get('is_from_start')]
|
||||
|
||||
def refetch_manifest(format_id, delay):
|
||||
def refetch_manifest(itag, client_name, delay):
|
||||
nonlocal formats, start_time, is_live
|
||||
if time.time() <= start_time + delay:
|
||||
return
|
||||
|
||||
_, _, _, _, prs, player_url = self._initial_extract(
|
||||
url, smuggled_data, webpage_url, 'web', video_id)
|
||||
url, smuggled_data, webpage_url, self._webpage_client, video_id)
|
||||
video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
|
||||
microformats = traverse_obj(
|
||||
prs, (..., 'microformat', 'playerMicroformatRenderer'),
|
||||
@@ -1928,20 +1954,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
is_live = live_status == 'is_live'
|
||||
start_time = time.time()
|
||||
|
||||
def mpd_feed(format_id, delay):
|
||||
def mpd_feed(itag, client_name, delay):
|
||||
"""
|
||||
@returns (manifest_url, manifest_stream_number, is_live) or None
|
||||
"""
|
||||
for retry in self.RetryManager(fatal=False):
|
||||
with lock:
|
||||
refetch_manifest(format_id, delay)
|
||||
refetch_manifest(itag, client_name, delay)
|
||||
|
||||
f = next((f for f in formats if f['format_id'] == format_id), None)
|
||||
f = next((f for f in formats if f.get('_itag') == itag and f.get('_client') == client_name), None)
|
||||
if not f:
|
||||
if not is_live:
|
||||
retry.error = f'{video_id}: Video is no longer live'
|
||||
else:
|
||||
retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
|
||||
retry.error = f'Cannot find refreshed manifest for format {itag}{bug_reports_message()}'
|
||||
continue
|
||||
|
||||
# Formats from ended premieres will be missing a manifest_url
|
||||
@@ -1954,7 +1980,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
for f in formats:
|
||||
f['is_live'] = is_live
|
||||
gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
|
||||
gen = functools.partial(self._live_dash_fragments, video_id, f['_itag'], f['_client'],
|
||||
live_start_time, mpd_feed, not is_live and f.copy())
|
||||
if is_live:
|
||||
f['fragments'] = gen
|
||||
@@ -1963,7 +1989,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f['fragments'] = LazyList(gen({}))
|
||||
del f['is_from_start']
|
||||
|
||||
def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
|
||||
def _live_dash_fragments(self, video_id, itag, client_name, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
|
||||
FETCH_SPAN, MAX_DURATION = 5, 432000
|
||||
|
||||
mpd_url, stream_number, is_live = None, None, True
|
||||
@@ -1987,7 +2013,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
old_mpd_url = mpd_url
|
||||
last_error = ctx.pop('last_error', None)
|
||||
expire_fast = immediate or (last_error and isinstance(last_error, HTTPError) and last_error.status == 403)
|
||||
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
|
||||
mpd_url, stream_number, is_live = (mpd_feed(itag, client_name, 5 if expire_fast else 18000)
|
||||
or (mpd_url, stream_number, False))
|
||||
if not refresh_sequence:
|
||||
if expire_fast and not is_live:
|
||||
@@ -2013,7 +2039,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
|
||||
return True, _last_seq
|
||||
|
||||
self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
|
||||
self.write_debug(f'[{video_id}] Generating fragments for format {itag}')
|
||||
while is_live:
|
||||
fetch_time = time.time()
|
||||
if no_fragment_score > 30:
|
||||
@@ -2075,15 +2101,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
|
||||
|
||||
def _get_player_js_version(self):
|
||||
player_js_version = self._configuration_arg('player_js_version', [''])[0] or self._DEFAULT_PLAYER_JS_VERSION
|
||||
if player_js_version == 'actual':
|
||||
if self._player_js_version == 'actual':
|
||||
return None, None
|
||||
if not re.fullmatch(r'[0-9]{5,}@[0-9a-f]{8,}', player_js_version):
|
||||
if not re.fullmatch(r'[0-9]{5,}@[0-9a-f]{8,}', self._player_js_version):
|
||||
self.report_warning(
|
||||
f'Invalid player JS version "{player_js_version}" specified. '
|
||||
f'Invalid player JS version "{self._player_js_version}" specified. '
|
||||
f'It should be "actual" or in the format of STS@HASH', only_once=True)
|
||||
return None, None
|
||||
return player_js_version.split('@')
|
||||
return self._player_js_version.split('@')
|
||||
|
||||
def _construct_player_url(self, *, player_id=None, player_url=None):
|
||||
assert player_id or player_url, '_construct_player_url must take one of player_id or player_url'
|
||||
@@ -2174,13 +2199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _extract_player_info(cls, player_url):
|
||||
for player_re in cls._PLAYER_INFO_RE:
|
||||
id_m = re.search(player_re, player_url)
|
||||
if id_m:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(f'Cannot identify player {player_url!r}')
|
||||
return id_m.group('id')
|
||||
if m := re.search(r'/s/player/(?P<id>[a-fA-F0-9]{8,})/', player_url):
|
||||
return m.group('id')
|
||||
raise ExtractorError(f'Cannot identify player {player_url!r}')
|
||||
|
||||
def _load_player(self, video_id, player_url, fatal=True):
|
||||
player_js_key = self._player_js_cache_key(player_url)
|
||||
@@ -2193,64 +2214,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._code_cache[player_js_key] = code
|
||||
return self._code_cache.get(player_js_key)
|
||||
|
||||
def _sig_spec_cache_id(self, player_url, spec_id):
|
||||
return join_nonempty(self._player_js_cache_key(player_url), str(spec_id))
|
||||
def _load_player_data_from_cache(self, name, player_url, *cache_keys, use_disk_cache=False):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url), *map(str_or_none, cache_keys))
|
||||
if cache_id in self._player_cache:
|
||||
return self._player_cache[cache_id]
|
||||
|
||||
def _load_sig_spec_from_cache(self, spec_cache_id):
|
||||
# This is almost identical to _load_player_data_from_cache
|
||||
# I hate it
|
||||
if spec_cache_id in self._player_cache:
|
||||
return self._player_cache[spec_cache_id]
|
||||
spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21')
|
||||
if spec:
|
||||
self._player_cache[spec_cache_id] = spec
|
||||
return spec
|
||||
if not use_disk_cache:
|
||||
return None
|
||||
|
||||
def _store_sig_spec_to_cache(self, spec_cache_id, spec):
|
||||
if spec_cache_id not in self._player_cache:
|
||||
self._player_cache[spec_cache_id] = spec
|
||||
self.cache.store('youtube-sigfuncs', spec_cache_id, spec)
|
||||
|
||||
def _load_player_data_from_cache(self, name, player_url):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
|
||||
if data := self._player_cache.get(cache_id):
|
||||
return data
|
||||
|
||||
data = self.cache.load(*cache_id, min_ver='2025.07.21')
|
||||
data = self.cache.load(cache_id[0], join_nonempty(*cache_id[1:]), min_ver='2025.07.21')
|
||||
if data:
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
return data
|
||||
|
||||
def _cached(self, func, *cache_id):
|
||||
def inner(*args, **kwargs):
|
||||
if cache_id not in self._player_cache:
|
||||
try:
|
||||
self._player_cache[cache_id] = func(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
self._player_cache[cache_id] = e
|
||||
except Exception as e:
|
||||
self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
|
||||
|
||||
ret = self._player_cache[cache_id]
|
||||
if isinstance(ret, Exception):
|
||||
raise ret
|
||||
return ret
|
||||
return inner
|
||||
|
||||
def _store_player_data_to_cache(self, name, player_url, data):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
def _store_player_data_to_cache(self, data, name, player_url, *cache_keys, use_disk_cache=False):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url), *map(str_or_none, cache_keys))
|
||||
if cache_id not in self._player_cache:
|
||||
self.cache.store(*cache_id, data)
|
||||
self._player_cache[cache_id] = data
|
||||
if use_disk_cache:
|
||||
self.cache.store(cache_id[0], join_nonempty(*cache_id[1:]), data)
|
||||
|
||||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||
"""
|
||||
Extract signatureTimestamp (sts)
|
||||
Required to tell API what sig/player version is in use.
|
||||
"""
|
||||
CACHE_ENABLED = False # TODO: enable when preprocessed player JS cache is solved/enabled
|
||||
|
||||
player_sts_override = self._get_player_js_version()[0]
|
||||
if player_sts_override:
|
||||
@@ -2267,15 +2256,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.report_warning(error_msg)
|
||||
return None
|
||||
|
||||
if CACHE_ENABLED and (sts := self._load_player_data_from_cache('sts', player_url)):
|
||||
# TODO: Pass `use_disk_cache=True` when preprocessed player JS cache is solved
|
||||
if sts := self._load_player_data_from_cache('sts', player_url):
|
||||
return sts
|
||||
|
||||
if code := self._load_player(video_id, player_url, fatal=fatal):
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
if CACHE_ENABLED and sts:
|
||||
self._store_player_data_to_cache('sts', player_url, sts)
|
||||
if sts:
|
||||
# TODO: Pass `use_disk_cache=True` when preprocessed player JS cache is solved
|
||||
self._store_player_data_to_cache(sts, 'sts', player_url)
|
||||
|
||||
return sts
|
||||
|
||||
@@ -2707,12 +2698,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return {'contentCheckOk': True, 'racyCheckOk': True}
|
||||
|
||||
@classmethod
|
||||
def _generate_player_context(cls, sts=None, use_ad_playback_context=False):
|
||||
def _generate_player_context(cls, sts=None, use_ad_playback_context=False, encrypted_context=None):
|
||||
context = {
|
||||
'html5Preference': 'HTML5_PREF_WANTS',
|
||||
}
|
||||
if sts is not None:
|
||||
context['signatureTimestamp'] = sts
|
||||
if encrypted_context:
|
||||
context['encryptedHostFlags'] = encrypted_context
|
||||
|
||||
playback_context = {
|
||||
'contentPlaybackContext': context,
|
||||
@@ -2793,7 +2786,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentFlags', {urllib.parse.parse_qs}))
|
||||
if 'true' in traverse_obj(experiments, (..., 'html5_generate_content_po_token', -1)):
|
||||
self.write_debug(
|
||||
f'{video_id}: Detected experiment to bind GVS PO Token to video id.', only_once=True)
|
||||
f'{video_id}: Detected experiment to bind GVS PO Token '
|
||||
f'to video ID for {client} client', only_once=True)
|
||||
gvs_bind_to_video_id = True
|
||||
|
||||
# GVS WebPO Token is bound to visitor_data / Visitor ID when logged out.
|
||||
@@ -2956,7 +2950,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._configuration_arg('use_ad_playback_context', ['false'])[0] != 'false'
|
||||
and traverse_obj(INNERTUBE_CLIENTS, (client, 'SUPPORTS_AD_PLAYBACK_CONTEXT', {bool})))
|
||||
|
||||
yt_query.update(self._generate_player_context(sts, use_ad_playback_context))
|
||||
# web_embedded player requests may need to include encryptedHostFlags in its contentPlaybackContext.
|
||||
# This can be detected with the embeds_enable_encrypted_host_flags_enforcement experiemnt flag,
|
||||
# but there is no harm in including encryptedHostFlags with all web_embedded player requests.
|
||||
encrypted_context = None
|
||||
if _split_innertube_client(client)[2] == 'embedded':
|
||||
encrypted_context = traverse_obj(player_ytcfg, (
|
||||
'WEB_PLAYER_CONTEXT_CONFIGS', 'WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER', 'encryptedHostFlags'))
|
||||
|
||||
yt_query.update(
|
||||
self._generate_player_context(
|
||||
sts=sts,
|
||||
use_ad_playback_context=use_ad_playback_context,
|
||||
encrypted_context=encrypted_context))
|
||||
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
@@ -3075,7 +3081,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
tried_iframe_fallback = True
|
||||
|
||||
pr = None
|
||||
if client == webpage_client and 'player_response' not in self._configuration_arg('webpage_skip'):
|
||||
if client == webpage_client and 'player_response' not in self._skipped_webpage_data:
|
||||
pr = initial_pr
|
||||
|
||||
visitor_data = visitor_data or self._extract_visitor_data(webpage_ytcfg, initial_pr, player_ytcfg)
|
||||
@@ -3141,6 +3147,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
prs.append(pr)
|
||||
|
||||
if (
|
||||
# Is this a "made for kids" video that can't be downloaded with android_vr?
|
||||
client == 'android_vr' and self._is_unplayable(pr)
|
||||
and webpage and 'made for kids' in webpage
|
||||
# ...and is a JS runtime is available?
|
||||
and any(p.is_available() for p in self._jsc_director.providers.values())
|
||||
):
|
||||
append_client('web_embedded')
|
||||
|
||||
# web_embedded can work around age-gate and age-verification for some embeddable videos
|
||||
if self._is_agegated(pr) and variant != 'web_embedded':
|
||||
append_client(f'web_embedded.{base_client}')
|
||||
@@ -3157,9 +3172,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted and YouTube is requiring '
|
||||
'account age-verification; some formats may be missing', only_once=True)
|
||||
# tv_embedded can work around the age-verification requirement for embeddable videos
|
||||
# web_creator may work around age-verification for all videos but requires PO token
|
||||
append_client('tv_embedded', 'web_creator')
|
||||
append_client('web_creator')
|
||||
|
||||
status = traverse_obj(pr, ('playabilityStatus', 'status', {str}))
|
||||
if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'):
|
||||
@@ -3233,7 +3247,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
|
||||
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
|
||||
])
|
||||
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
||||
format_types = self._configuration_arg('formats')
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
all_formats = 'duplicate' in format_types
|
||||
if self._configuration_arg('include_duplicate_formats'):
|
||||
all_formats = True
|
||||
@@ -3278,6 +3294,98 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return language_code, DEFAULT_LANG_VALUE
|
||||
return language_code, -1
|
||||
|
||||
def get_manifest_n_challenge(manifest_url):
|
||||
if not url_or_none(manifest_url):
|
||||
return None
|
||||
# Same pattern that the player JS uses to read/replace the n challenge value
|
||||
return self._search_regex(
|
||||
r'/n/([^/]+)/', urllib.parse.urlparse(manifest_url).path,
|
||||
'n challenge', default=None)
|
||||
|
||||
n_challenges = set()
|
||||
s_challenges = set()
|
||||
|
||||
def solve_js_challenges():
|
||||
# Solve all n/sig challenges in bulk and store the results in self._player_cache
|
||||
challenge_requests = []
|
||||
if n_challenges:
|
||||
challenge_requests.append(JsChallengeRequest(
|
||||
type=JsChallengeType.N,
|
||||
video_id=video_id,
|
||||
input=NChallengeInput(challenges=list(n_challenges), player_url=player_url)))
|
||||
if s_challenges:
|
||||
cached_sigfuncs = set()
|
||||
for spec_id in s_challenges:
|
||||
if self._load_player_data_from_cache('sigfuncs', player_url, spec_id, use_disk_cache=True):
|
||||
cached_sigfuncs.add(spec_id)
|
||||
s_challenges.difference_update(cached_sigfuncs)
|
||||
|
||||
challenge_requests.append(JsChallengeRequest(
|
||||
type=JsChallengeType.SIG,
|
||||
video_id=video_id,
|
||||
input=SigChallengeInput(
|
||||
challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges],
|
||||
player_url=player_url)))
|
||||
|
||||
if challenge_requests:
|
||||
for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
|
||||
if challenge_response.type == JsChallengeType.SIG:
|
||||
for challenge, result in challenge_response.output.results.items():
|
||||
spec_id = len(challenge)
|
||||
self._store_player_data_to_cache(
|
||||
[ord(c) for c in result], 'sigfuncs',
|
||||
player_url, spec_id, use_disk_cache=True)
|
||||
if spec_id in s_challenges:
|
||||
s_challenges.remove(spec_id)
|
||||
|
||||
elif challenge_response.type == JsChallengeType.N:
|
||||
for challenge, result in challenge_response.output.results.items():
|
||||
self._store_player_data_to_cache(result, 'n', player_url, challenge)
|
||||
if challenge in n_challenges:
|
||||
n_challenges.remove(challenge)
|
||||
|
||||
# Raise warning if any challenge requests remain
|
||||
# Depending on type of challenge request
|
||||
help_message = (
|
||||
'Ensure you have a supported JavaScript runtime and '
|
||||
'challenge solver script distribution installed. '
|
||||
'Review any warnings presented before this message. '
|
||||
f'For more details, refer to {_EJS_WIKI_URL}')
|
||||
if s_challenges:
|
||||
self.report_warning(
|
||||
f'Signature solving failed: Some formats may be missing. {help_message}',
|
||||
video_id=video_id, only_once=True)
|
||||
if n_challenges:
|
||||
self.report_warning(
|
||||
f'n challenge solving failed: Some formats may be missing. {help_message}',
|
||||
video_id=video_id, only_once=True)
|
||||
|
||||
# Clear challenge sets so that any subsequent call of this function is a no-op
|
||||
s_challenges.clear()
|
||||
n_challenges.clear()
|
||||
|
||||
# 1st pass to collect all n/sig challenges so they can later be solved at once in bulk
|
||||
for streaming_data in traverse_obj(player_responses, (..., 'streamingData', {dict})):
|
||||
# HTTPS formats
|
||||
for fmt_stream in traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
fmt_url = fmt_stream.get('url')
|
||||
s_challenge = None
|
||||
if not fmt_url:
|
||||
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
||||
fmt_url = traverse_obj(sc, ('url', 0, {url_or_none}))
|
||||
s_challenge = traverse_obj(sc, ('s', 0))
|
||||
|
||||
if s_challenge:
|
||||
s_challenges.add(len(s_challenge))
|
||||
|
||||
if n_challenge := traverse_obj(fmt_url, ({parse_qs}, 'n', 0)):
|
||||
n_challenges.add(n_challenge)
|
||||
|
||||
# Manifest formats
|
||||
n_challenges.update(traverse_obj(
|
||||
streaming_data, (('hlsManifestUrl', 'dashManifestUrl'), {get_manifest_n_challenge})))
|
||||
|
||||
# Final pass to extract formats and solve n/sig challenges as needed
|
||||
for pr in player_responses:
|
||||
streaming_data = traverse_obj(pr, 'streamingData')
|
||||
if not streaming_data:
|
||||
@@ -3385,10 +3493,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def process_https_formats():
|
||||
proto = 'https'
|
||||
https_fmts = []
|
||||
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
||||
|
||||
for fmt_stream in streaming_formats:
|
||||
if fmt_stream.get('targetDurationSec'):
|
||||
# Live adaptive https formats are not supported: skip unless extractor-arg given
|
||||
if fmt_stream.get('targetDurationSec') and skip_bad_formats:
|
||||
continue
|
||||
|
||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||
@@ -3422,19 +3530,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/14883
|
||||
get_language_code_and_preference(fmt_stream)
|
||||
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
fmt_url = traverse_obj(sc, ('url', 0, {url_or_none}))
|
||||
encrypted_sig = traverse_obj(sc, ('s', 0))
|
||||
if not all((sc, fmt_url, skip_player_js or player_url, encrypted_sig)):
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a URL. '
|
||||
msg_tmpl = (
|
||||
'{}Some {} client https formats have been skipped as they are missing a URL. '
|
||||
'{}. See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details')
|
||||
if client_name in ('web', 'web_safari'):
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
self.write_debug(msg_tmpl.format(
|
||||
f'{video_id}: ', client_name,
|
||||
'YouTube is forcing SABR streaming for this client'), only_once=True)
|
||||
else:
|
||||
msg += (
|
||||
msg = (
|
||||
f'YouTube may have enabled the SABR-only streaming experiment for '
|
||||
f'{"your account" if self.is_authenticated else "the current session"}. '
|
||||
)
|
||||
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
f'{"your account" if self.is_authenticated else "the current session"}')
|
||||
self.report_warning(msg_tmpl.format('', client_name, msg), video_id, only_once=True)
|
||||
continue
|
||||
|
||||
fmt = process_format_stream(
|
||||
@@ -3444,19 +3554,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
|
||||
# signature
|
||||
# Attempt to load sig spec from cache
|
||||
if encrypted_sig:
|
||||
if skip_player_js:
|
||||
continue
|
||||
spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
|
||||
spec = self._load_sig_spec_from_cache(spec_cache_id)
|
||||
if spec:
|
||||
self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True)
|
||||
fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature',
|
||||
solve_sig(encrypted_sig, spec))
|
||||
else:
|
||||
fmt['_jsc_s_challenge'] = encrypted_sig
|
||||
fmt['_jsc_s_sc'] = sc
|
||||
solve_js_challenges()
|
||||
spec = self._load_player_data_from_cache(
|
||||
'sigfuncs', player_url, len(encrypted_sig), use_disk_cache=True)
|
||||
if not spec:
|
||||
continue
|
||||
fmt_url += '&{}={}'.format(
|
||||
traverse_obj(sc, ('sp', -1)) or 'signature',
|
||||
solve_sig(encrypted_sig, spec))
|
||||
|
||||
# n challenge
|
||||
query = parse_qs(fmt_url)
|
||||
@@ -3464,10 +3572,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if skip_player_js:
|
||||
continue
|
||||
n_challenge = query['n'][0]
|
||||
if n_challenge in self._player_cache:
|
||||
fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
|
||||
else:
|
||||
fmt['_jsc_n_challenge'] = n_challenge
|
||||
solve_js_challenges()
|
||||
n_result = self._load_player_data_from_cache('n', player_url, n_challenge)
|
||||
if not n_result:
|
||||
continue
|
||||
fmt_url = update_url_query(fmt_url, {'n': n_result})
|
||||
|
||||
if po_token:
|
||||
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||||
@@ -3484,80 +3593,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
https_fmts.append(fmt)
|
||||
|
||||
# Bulk process sig/n handling
|
||||
# Retrieve all JSC Sig and n requests for this player response in one go
|
||||
n_challenges = {}
|
||||
s_challenges = {}
|
||||
for fmt in https_fmts:
|
||||
# This will de-duplicate requests
|
||||
n_challenge = fmt.pop('_jsc_n_challenge', None)
|
||||
if n_challenge is not None:
|
||||
n_challenges.setdefault(n_challenge, []).append(fmt)
|
||||
|
||||
s_challenge = fmt.pop('_jsc_s_challenge', None)
|
||||
if s_challenge is not None:
|
||||
s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt)
|
||||
|
||||
challenge_requests = []
|
||||
if n_challenges:
|
||||
challenge_requests.append(JsChallengeRequest(
|
||||
type=JsChallengeType.N,
|
||||
video_id=video_id,
|
||||
input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url)))
|
||||
if s_challenges:
|
||||
challenge_requests.append(JsChallengeRequest(
|
||||
type=JsChallengeType.SIG,
|
||||
video_id=video_id,
|
||||
input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url)))
|
||||
|
||||
if challenge_requests:
|
||||
for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
|
||||
if challenge_response.type == JsChallengeType.SIG:
|
||||
for challenge, result in challenge_response.output.results.items():
|
||||
spec_id = len(challenge)
|
||||
spec = [ord(c) for c in result]
|
||||
self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec)
|
||||
s_challenge_data = s_challenges.pop(spec_id, {})
|
||||
if not s_challenge_data:
|
||||
continue
|
||||
for s_challenge, fmts in s_challenge_data.items():
|
||||
solved_challenge = solve_sig(s_challenge, spec)
|
||||
for fmt in fmts:
|
||||
sc = fmt.pop('_jsc_s_sc')
|
||||
fmt['url'] += '&{}={}'.format(
|
||||
traverse_obj(sc, ('sp', -1)) or 'signature',
|
||||
solved_challenge)
|
||||
|
||||
elif challenge_response.type == JsChallengeType.N:
|
||||
for challenge, result in challenge_response.output.results.items():
|
||||
fmts = n_challenges.pop(challenge, [])
|
||||
for fmt in fmts:
|
||||
self._player_cache[challenge] = result
|
||||
fmt['url'] = update_url_query(fmt['url'], {'n': result})
|
||||
|
||||
# Raise warning if any challenge requests remain
|
||||
# Depending on type of challenge request
|
||||
|
||||
help_message = (
|
||||
'Ensure you have a supported JavaScript runtime and '
|
||||
'challenge solver script distribution installed. '
|
||||
'Review any warnings presented before this message. '
|
||||
f'For more details, refer to {_EJS_WIKI_URL}')
|
||||
|
||||
if s_challenges:
|
||||
self.report_warning(
|
||||
f'Signature solving failed: Some formats may be missing. {help_message}',
|
||||
video_id=video_id, only_once=True)
|
||||
if n_challenges:
|
||||
self.report_warning(
|
||||
f'n challenge solving failed: Some formats may be missing. {help_message}',
|
||||
video_id=video_id, only_once=True)
|
||||
|
||||
for cfmts in list(s_challenges.values()) + list(n_challenges.values()):
|
||||
for fmt in cfmts:
|
||||
if fmt in https_fmts:
|
||||
https_fmts.remove(fmt)
|
||||
|
||||
for fmt in https_fmts:
|
||||
if (all_formats or 'dashy' in format_types) and fmt['filesize']:
|
||||
yield {
|
||||
@@ -3573,7 +3608,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yield from process_https_formats()
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
|
||||
skip_manifests = set(self._configuration_arg('skip'))
|
||||
if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
|
||||
@@ -3640,17 +3674,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
|
||||
if hls_manifest_url:
|
||||
manifest_path = urllib.parse.urlparse(hls_manifest_url).path
|
||||
if m := re.fullmatch(r'(?P<path>.+)(?P<suffix>/(?:file|playlist)/index\.m3u8)', manifest_path):
|
||||
manifest_path, manifest_suffix = m.group('path', 'suffix')
|
||||
else:
|
||||
manifest_suffix = ''
|
||||
|
||||
solved_n = False
|
||||
n_challenge = get_manifest_n_challenge(hls_manifest_url)
|
||||
if n_challenge and not skip_player_js:
|
||||
solve_js_challenges()
|
||||
n_result = self._load_player_data_from_cache('n', player_url, n_challenge)
|
||||
if n_result:
|
||||
manifest_path = manifest_path.replace(f'/n/{n_challenge}', f'/n/{n_result}')
|
||||
solved_n = n_result in manifest_path
|
||||
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
|
||||
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
|
||||
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||||
if po_token:
|
||||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
manifest_path = manifest_path.rstrip('/') + f'/pot/{po_token}'
|
||||
if client_name not in gvs_pots:
|
||||
gvs_pots[client_name] = po_token
|
||||
|
||||
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, 'hls')
|
||||
else:
|
||||
elif solved_n or not n_challenge:
|
||||
hls_manifest_url = update_url(hls_manifest_url, path=f'{manifest_path}{manifest_suffix}')
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
@@ -3665,17 +3716,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
|
||||
if dash_manifest_url:
|
||||
manifest_path = urllib.parse.urlparse(dash_manifest_url).path
|
||||
|
||||
solved_n = False
|
||||
n_challenge = get_manifest_n_challenge(dash_manifest_url)
|
||||
if n_challenge and not skip_player_js:
|
||||
solve_js_challenges()
|
||||
n_result = self._load_player_data_from_cache('n', player_url, n_challenge)
|
||||
if n_result:
|
||||
manifest_path = manifest_path.replace(f'/n/{n_challenge}', f'/n/{n_result}')
|
||||
solved_n = n_result in manifest_path
|
||||
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
|
||||
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
|
||||
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||||
if po_token:
|
||||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
manifest_path = manifest_path.rstrip('/') + f'/pot/{po_token}'
|
||||
if client_name not in gvs_pots:
|
||||
gvs_pots[client_name] = po_token
|
||||
|
||||
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, 'dash')
|
||||
else:
|
||||
elif solved_n or not n_challenge:
|
||||
dash_manifest_url = update_url(dash_manifest_url, path=manifest_path)
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: If DASH video requires a PO Token, do the subs also require pot?
|
||||
@@ -3683,11 +3747,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], require_po_token and not po_token):
|
||||
# Save original itag value as format_id because process_manifest_format mutates f
|
||||
format_id = f['format_id']
|
||||
if process_manifest_format(f, 'dash', client_name, format_id, require_po_token and not po_token):
|
||||
f['filesize'] = int_or_none(self._search_regex(
|
||||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||||
if needs_live_processing:
|
||||
f['is_from_start'] = True
|
||||
f['_itag'] = format_id
|
||||
f['_client'] = client_name
|
||||
yield f
|
||||
yield subtitles
|
||||
|
||||
@@ -3800,7 +3868,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
|
||||
initial_data = None
|
||||
if webpage and 'initial_data' not in self._configuration_arg('webpage_skip'):
|
||||
if webpage and 'initial_data' not in self._skipped_webpage_data:
|
||||
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
|
||||
if not traverse_obj(initial_data, 'contents'):
|
||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||
@@ -3848,10 +3916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
base_url = self.http_scheme() + '//www.youtube.com/'
|
||||
webpage_url = base_url + 'watch?v=' + video_id
|
||||
webpage_client = 'web'
|
||||
|
||||
webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract(
|
||||
url, smuggled_data, webpage_url, webpage_client, video_id)
|
||||
url, smuggled_data, webpage_url, self._webpage_client, video_id)
|
||||
|
||||
playability_statuses = traverse_obj(
|
||||
player_responses, (..., 'playabilityStatus'), expected_type=dict)
|
||||
@@ -4053,16 +4120,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
|
||||
def is_bad_format(fmt):
|
||||
if needs_live_processing and not fmt.get('is_from_start'):
|
||||
return True
|
||||
elif (live_status == 'is_live' and needs_live_processing != 'is_live'
|
||||
and fmt.get('protocol') == 'http_dash_segments'):
|
||||
return True
|
||||
def adjust_incomplete_format(fmt, note_suffix='(Last 2 hours)', pref_adjustment=-10):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) + pref_adjustment
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), note_suffix, delim=' ')
|
||||
|
||||
for fmt in filter(is_bad_format, formats):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) - 10
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
|
||||
# Adjust preference and format note for incomplete live/post-live formats
|
||||
if live_status in ('is_live', 'post_live'):
|
||||
for fmt in formats:
|
||||
protocol = fmt.get('protocol')
|
||||
# Currently, protocol isn't set for adaptive https formats, but this could change
|
||||
is_adaptive = protocol in (None, 'http', 'https')
|
||||
if live_status == 'post_live' and is_adaptive:
|
||||
# Post-live adaptive formats cause HttpFD to raise "Did not get any data blocks"
|
||||
# These formats are *only* useful to external applications, so we can hide them
|
||||
# Set their preference <= -1000 so that FormatSorter flags them as 'hidden'
|
||||
adjust_incomplete_format(fmt, note_suffix='(ended)', pref_adjustment=-5000)
|
||||
# Is it live with --live-from-start? Or is it post-live and its duration is >2hrs?
|
||||
elif needs_live_processing:
|
||||
if not fmt.get('is_from_start'):
|
||||
# Post-live m3u8 formats for >2hr streams
|
||||
adjust_incomplete_format(fmt)
|
||||
elif live_status == 'is_live':
|
||||
if protocol == 'http_dash_segments':
|
||||
# Live DASH formats without --live-from-start
|
||||
adjust_incomplete_format(fmt)
|
||||
elif is_adaptive:
|
||||
# Incomplete live adaptive https formats
|
||||
adjust_incomplete_format(fmt, note_suffix='(incomplete)', pref_adjustment=-20)
|
||||
|
||||
if needs_live_processing:
|
||||
self._prepare_live_from_start_formats(
|
||||
|
||||
@@ -45,6 +45,8 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
JS_RUNTIME_NAME = 'bun'
|
||||
BUN_NPM_LIB_FILENAME = 'yt.solver.bun.lib.js'
|
||||
SUPPORTED_PROXY_SCHEMES = ['http', 'https']
|
||||
_BUN_MAX_SUPPORTED_VERSION = (1, 3, 14)
|
||||
_BUN_DEPRECATION_URL = 'https://github.com/yt-dlp/yt-dlp/issues/16766'
|
||||
|
||||
def _iter_script_sources(self):
|
||||
yield from super()._iter_script_sources()
|
||||
@@ -112,6 +114,19 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
return options
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
is_unsupported_version = self.runtime_info.version_tuple > self._BUN_MAX_SUPPORTED_VERSION
|
||||
if is_unsupported_version:
|
||||
self.logger.warning(
|
||||
f'bun version {".".join(map(str, self.runtime_info.version_tuple))} is not supported! '
|
||||
f'{".".join(map(str, self._BUN_MAX_SUPPORTED_VERSION))} is the last supported bun version. '
|
||||
f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} '
|
||||
f'open a bug report even if you encounter any errors!',
|
||||
once=True)
|
||||
else:
|
||||
self.logger.info(
|
||||
f'bun support has been deprecated. See {self._BUN_DEPRECATION_URL} for details',
|
||||
once=True)
|
||||
|
||||
# https://bun.com/docs/cli/run
|
||||
options = ['--no-addons', '--prefer-offline']
|
||||
if self._lib_script.variant == ScriptVariant.BUN_NPM:
|
||||
@@ -136,7 +151,7 @@ class BunJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
msg = f'Error running bun process (returncode: {proc.returncode})'
|
||||
if stderr:
|
||||
msg = f'{msg}: {stderr.strip()}'
|
||||
raise JsChallengeProviderError(msg)
|
||||
raise JsChallengeProviderError(msg, expected=is_unsupported_version)
|
||||
return stdout
|
||||
|
||||
def _clean_stderr(self, stderr):
|
||||
|
||||
@@ -21,12 +21,20 @@ from yt_dlp.utils import Popen
|
||||
class QuickJSJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'quickjs'
|
||||
JS_RUNTIME_NAME = 'quickjs'
|
||||
_QJS_MIN_RECOMMENDED = {
|
||||
'quickjs': (2025, 4, 26),
|
||||
'quickjs-ng': (0, 12, 0),
|
||||
}
|
||||
_QJS_WARNING_TMPL = (
|
||||
'{name} versions older than {version} are missing important optimizations '
|
||||
'and will solve the JS challenges very slowly. Consider upgrading.')
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
if self.runtime_info.name == 'quickjs-ng':
|
||||
self.logger.warning('QuickJS-NG is missing some optimizations making this very slow. Consider using upstream QuickJS instead.')
|
||||
elif self.runtime_info.version_tuple < (2025, 4, 26):
|
||||
self.logger.warning('Older QuickJS versions are missing optimizations making this very slow. Consider upgrading.')
|
||||
min_recommended_version = self._QJS_MIN_RECOMMENDED[self.runtime_info.name]
|
||||
if self.runtime_info.version_tuple < min_recommended_version:
|
||||
self.logger.warning(self._QJS_WARNING_TMPL.format(
|
||||
name=self.runtime_info.name,
|
||||
version='.'.join(map(str, min_recommended_version))))
|
||||
|
||||
# QuickJS does not support reading from stdin, so we have to use a temp file
|
||||
temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8')
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
|
||||
# This file is generated by devscripts/update_requirements.py. DO NOT MODIFY!
|
||||
|
||||
VERSION = '0.3.2'
|
||||
VERSION = '0.8.0'
|
||||
HASHES = {
|
||||
'yt.solver.bun.lib.js': '6ff45e94de9f0ea936a183c48173cfa9ce526ee4b7544cd556428427c1dd53c8073ef0174e79b320252bf0e7c64b0032cc1cf9c4358f3fda59033b7caa01c241',
|
||||
'yt.solver.core.js': '0cd96b2d3f319dfa62cae689efa7d930ef1706e95f5921794db5089b2262957ec0a17d73938d8975ea35d0309cbfb4c8e4418d5e219837215eee242890c8b64d',
|
||||
'yt.solver.core.min.js': '370d627703002b4a73b10027702734a3de9484f6b56b739942be1dc2b60fee49dee2aa86ed117d1c8ae1ac55181d326481f1fe2e2e8d5211154d48e2a55dac51',
|
||||
'yt.solver.core.js': 'c163a6f376db6ce3da47d516a28a8f2a0554ae95c58dc766f0a6e2b3894f2cef1ee07fa84beb442fa471aac4f300985added1657c7c94c4d1cfefe68920ab599',
|
||||
'yt.solver.core.min.js': 'ee5b307d07f55e91e4723edf5ac205cc877a474187849d757dc1322e38427b157a9d706d510c1723d3670f98e5a3f8cbcde77874a80406bd7204bc9fea30f283',
|
||||
'yt.solver.deno.lib.js': '9c8ee3ab6c23e443a5a951e3ac73c6b8c1c8fb34335e7058a07bf99d349be5573611de00536dcd03ecd3cf34014c4e9b536081de37af3637c5390c6a6fd6a0f0',
|
||||
'yt.solver.lib.js': '1ee3753a8222fc855f5c39db30a9ccbb7967dbe1fb810e86dc9a89aa073a0907f294c720e9b65427d560a35aa1ce6af19ef854d9126a05ca00afe03f72047733',
|
||||
'yt.solver.lib.min.js': '8420c259ad16e99ce004e4651ac1bcabb53b4457bf5668a97a9359be9a998a789fee8ab124ee17f91a2ea8fd84e0f2b2fc8eabcaf0b16a186ba734cf422ad053',
|
||||
|
||||
@@ -39,189 +39,8 @@ var jsc = (function (meriyah, astring) {
|
||||
function isOneOf(value, ...of) {
|
||||
return of.includes(value);
|
||||
}
|
||||
function _optionalChain$2(ops) {
|
||||
let lastAccessLHS = undefined;
|
||||
let value = ops[0];
|
||||
let i = 1;
|
||||
while (i < ops.length) {
|
||||
const op = ops[i];
|
||||
const fn = ops[i + 1];
|
||||
i += 2;
|
||||
if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) {
|
||||
return undefined;
|
||||
}
|
||||
if (op === 'access' || op === 'optionalAccess') {
|
||||
lastAccessLHS = value;
|
||||
value = fn(value);
|
||||
} else if (op === 'call' || op === 'optionalCall') {
|
||||
value = fn((...args) => value.call(lastAccessLHS, ...args));
|
||||
lastAccessLHS = undefined;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
const logicalExpression = {
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'LogicalExpression',
|
||||
left: { type: 'Identifier' },
|
||||
right: {
|
||||
type: 'SequenceExpression',
|
||||
expressions: [
|
||||
{
|
||||
type: 'AssignmentExpression',
|
||||
left: { type: 'Identifier' },
|
||||
operator: '=',
|
||||
right: {
|
||||
type: 'CallExpression',
|
||||
callee: { type: 'Identifier' },
|
||||
arguments: {
|
||||
or: [
|
||||
[
|
||||
{ type: 'Literal' },
|
||||
{
|
||||
type: 'CallExpression',
|
||||
callee: {
|
||||
type: 'Identifier',
|
||||
name: 'decodeURIComponent',
|
||||
},
|
||||
arguments: [{ type: 'Identifier' }],
|
||||
optional: false,
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
type: 'CallExpression',
|
||||
callee: {
|
||||
type: 'Identifier',
|
||||
name: 'decodeURIComponent',
|
||||
},
|
||||
arguments: [{ type: 'Identifier' }],
|
||||
optional: false,
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
optional: false,
|
||||
},
|
||||
},
|
||||
{ type: 'CallExpression' },
|
||||
],
|
||||
},
|
||||
operator: '&&',
|
||||
},
|
||||
};
|
||||
const identifier$1 = {
|
||||
or: [
|
||||
{
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'AssignmentExpression',
|
||||
operator: '=',
|
||||
left: { type: 'Identifier' },
|
||||
right: { type: 'FunctionExpression', params: [{}, {}, {}] },
|
||||
},
|
||||
},
|
||||
{ type: 'FunctionDeclaration', params: [{}, {}, {}] },
|
||||
{
|
||||
type: 'VariableDeclaration',
|
||||
declarations: {
|
||||
anykey: [
|
||||
{
|
||||
type: 'VariableDeclarator',
|
||||
init: { type: 'FunctionExpression', params: [{}, {}, {}] },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
function extract$1(node) {
|
||||
if (!matchesStructure(node, identifier$1)) {
|
||||
return null;
|
||||
}
|
||||
let block;
|
||||
if (
|
||||
node.type === 'ExpressionStatement' &&
|
||||
node.expression.type === 'AssignmentExpression' &&
|
||||
node.expression.right.type === 'FunctionExpression'
|
||||
) {
|
||||
block = node.expression.right.body;
|
||||
} else if (node.type === 'VariableDeclaration') {
|
||||
for (const decl of node.declarations) {
|
||||
if (
|
||||
decl.type === 'VariableDeclarator' &&
|
||||
_optionalChain$2([
|
||||
decl,
|
||||
'access',
|
||||
(_) => _.init,
|
||||
'optionalAccess',
|
||||
(_2) => _2.type,
|
||||
]) === 'FunctionExpression' &&
|
||||
_optionalChain$2([
|
||||
decl,
|
||||
'access',
|
||||
(_3) => _3.init,
|
||||
'optionalAccess',
|
||||
(_4) => _4.params,
|
||||
'access',
|
||||
(_5) => _5.length,
|
||||
]) === 3
|
||||
) {
|
||||
block = decl.init.body;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (node.type === 'FunctionDeclaration') {
|
||||
block = node.body;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
const relevantExpression = _optionalChain$2([
|
||||
block,
|
||||
'optionalAccess',
|
||||
(_6) => _6.body,
|
||||
'access',
|
||||
(_7) => _7.at,
|
||||
'call',
|
||||
(_8) => _8(-2),
|
||||
]);
|
||||
if (!matchesStructure(relevantExpression, logicalExpression)) {
|
||||
return null;
|
||||
}
|
||||
if (
|
||||
_optionalChain$2([
|
||||
relevantExpression,
|
||||
'optionalAccess',
|
||||
(_9) => _9.type,
|
||||
]) !== 'ExpressionStatement' ||
|
||||
relevantExpression.expression.type !== 'LogicalExpression' ||
|
||||
relevantExpression.expression.right.type !== 'SequenceExpression' ||
|
||||
relevantExpression.expression.right.expressions[0].type !==
|
||||
'AssignmentExpression'
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
const call = relevantExpression.expression.right.expressions[0].right;
|
||||
if (call.type !== 'CallExpression' || call.callee.type !== 'Identifier') {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
type: 'ArrowFunctionExpression',
|
||||
params: [{ type: 'Identifier', name: 'sig' }],
|
||||
body: {
|
||||
type: 'CallExpression',
|
||||
callee: { type: 'Identifier', name: call.callee.name },
|
||||
arguments:
|
||||
call.arguments.length === 1
|
||||
? [{ type: 'Identifier', name: 'sig' }]
|
||||
: [call.arguments[0], { type: 'Identifier', name: 'sig' }],
|
||||
optional: false,
|
||||
},
|
||||
async: false,
|
||||
expression: false,
|
||||
generator: false,
|
||||
};
|
||||
function generateArrowFunction(data) {
|
||||
return meriyah.parse(data).body[0].expression;
|
||||
}
|
||||
function _optionalChain$1(ops) {
|
||||
let lastAccessLHS = undefined;
|
||||
@@ -246,156 +65,117 @@ var jsc = (function (meriyah, astring) {
|
||||
}
|
||||
const identifier = {
|
||||
or: [
|
||||
{
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'AssignmentExpression',
|
||||
operator: '=',
|
||||
left: { or: [{ type: 'Identifier' }, { type: 'MemberExpression' }] },
|
||||
right: { type: 'FunctionExpression', async: false },
|
||||
},
|
||||
},
|
||||
{ type: 'FunctionDeclaration', async: false, id: { type: 'Identifier' } },
|
||||
{
|
||||
type: 'VariableDeclaration',
|
||||
kind: 'var',
|
||||
declarations: {
|
||||
anykey: [
|
||||
{
|
||||
type: 'VariableDeclarator',
|
||||
id: { type: 'Identifier' },
|
||||
init: {
|
||||
type: 'ArrayExpression',
|
||||
elements: [{ type: 'Identifier' }],
|
||||
},
|
||||
init: { type: 'FunctionExpression', async: false },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'AssignmentExpression',
|
||||
left: { type: 'Identifier' },
|
||||
operator: '=',
|
||||
right: {
|
||||
type: 'ArrayExpression',
|
||||
elements: [{ type: 'Identifier' }],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
const catchBlockBody = [
|
||||
{
|
||||
type: 'ReturnStatement',
|
||||
argument: {
|
||||
type: 'BinaryExpression',
|
||||
left: {
|
||||
type: 'MemberExpression',
|
||||
object: { type: 'Identifier' },
|
||||
computed: true,
|
||||
property: { type: 'Literal' },
|
||||
optional: false,
|
||||
},
|
||||
right: { type: 'Identifier' },
|
||||
operator: '+',
|
||||
const asdasd = {
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'CallExpression',
|
||||
callee: {
|
||||
type: 'MemberExpression',
|
||||
object: { type: 'Identifier' },
|
||||
property: {},
|
||||
optional: false,
|
||||
},
|
||||
arguments: [
|
||||
{ type: 'Literal', value: 'alr' },
|
||||
{ type: 'Literal', value: 'yes' },
|
||||
],
|
||||
optional: false,
|
||||
},
|
||||
];
|
||||
};
|
||||
function extract(node) {
|
||||
if (!matchesStructure(node, identifier)) {
|
||||
let name = null;
|
||||
let block = null;
|
||||
switch (node.type) {
|
||||
case 'ExpressionStatement': {
|
||||
if (
|
||||
node.expression.type === 'AssignmentExpression' &&
|
||||
node.expression.left.type === 'Identifier' &&
|
||||
node.expression.right.type === 'FunctionExpression' &&
|
||||
node.expression.right.params.length === 1
|
||||
) {
|
||||
name = node.expression.left.name;
|
||||
block = node.expression.right.body;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'FunctionDeclaration': {
|
||||
if (node.params.length === 1) {
|
||||
name = _optionalChain$1([
|
||||
node,
|
||||
'access',
|
||||
(_) => _.id,
|
||||
'optionalAccess',
|
||||
(_2) => _2.name,
|
||||
]);
|
||||
block = node.body;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!block || !name) {
|
||||
return null;
|
||||
}
|
||||
const tryNode = block.body.at(-2);
|
||||
if (
|
||||
_optionalChain$1([tryNode, 'optionalAccess', (_3) => _3.type]) !==
|
||||
'TryStatement' ||
|
||||
_optionalChain$1([
|
||||
tryNode,
|
||||
'access',
|
||||
(_4) => _4.handler,
|
||||
'optionalAccess',
|
||||
(_5) => _5.type,
|
||||
]) !== 'CatchClause'
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
const catchBody = tryNode.handler.body.body;
|
||||
if (matchesStructure(catchBody, catchBlockBody)) {
|
||||
return makeSolverFuncFromName(name);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (node.type === 'VariableDeclaration') {
|
||||
for (const declaration of node.declarations) {
|
||||
if (
|
||||
declaration.type !== 'VariableDeclarator' ||
|
||||
!declaration.init ||
|
||||
declaration.init.type !== 'ArrayExpression' ||
|
||||
declaration.init.elements.length !== 1
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
const [firstElement] = declaration.init.elements;
|
||||
if (firstElement && firstElement.type === 'Identifier') {
|
||||
return makeSolverFuncFromName(firstElement.name);
|
||||
}
|
||||
const options = [];
|
||||
if (node.type === 'FunctionDeclaration') {
|
||||
if (
|
||||
node.id &&
|
||||
_optionalChain$1([
|
||||
node,
|
||||
'access',
|
||||
(_) => _.body,
|
||||
'optionalAccess',
|
||||
(_2) => _2.body,
|
||||
])
|
||||
) {
|
||||
options.push({
|
||||
name: node.id,
|
||||
statements: _optionalChain$1([
|
||||
node,
|
||||
'access',
|
||||
(_3) => _3.body,
|
||||
'optionalAccess',
|
||||
(_4) => _4.body,
|
||||
]),
|
||||
});
|
||||
}
|
||||
} else if (node.type === 'ExpressionStatement') {
|
||||
const expr = node.expression;
|
||||
if (
|
||||
expr.type === 'AssignmentExpression' &&
|
||||
expr.left.type === 'Identifier' &&
|
||||
expr.operator === '=' &&
|
||||
expr.right.type === 'ArrayExpression' &&
|
||||
expr.right.elements.length === 1
|
||||
) {
|
||||
const [firstElement] = expr.right.elements;
|
||||
if (firstElement && firstElement.type === 'Identifier') {
|
||||
return makeSolverFuncFromName(firstElement.name);
|
||||
if (node.expression.type !== 'AssignmentExpression') {
|
||||
return null;
|
||||
}
|
||||
const name = node.expression.left;
|
||||
const body = _optionalChain$1([
|
||||
node.expression.right,
|
||||
'optionalAccess',
|
||||
(_5) => _5.body,
|
||||
'optionalAccess',
|
||||
(_6) => _6.body,
|
||||
]);
|
||||
if (name && body) {
|
||||
options.push({ name: name, statements: body });
|
||||
}
|
||||
} else if (node.type === 'VariableDeclaration') {
|
||||
for (const declaration of node.declarations) {
|
||||
const name = declaration.id;
|
||||
const body = _optionalChain$1([
|
||||
declaration.init,
|
||||
'optionalAccess',
|
||||
(_7) => _7.body,
|
||||
'optionalAccess',
|
||||
(_8) => _8.body,
|
||||
]);
|
||||
if (name && body) {
|
||||
options.push({ name: name, statements: body });
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const { name: name, statements: statements } of options) {
|
||||
if (matchesStructure(statements, { anykey: [asdasd] })) {
|
||||
return createSolver(name);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
function makeSolverFuncFromName(name) {
|
||||
return {
|
||||
type: 'ArrowFunctionExpression',
|
||||
params: [{ type: 'Identifier', name: 'n' }],
|
||||
body: {
|
||||
type: 'CallExpression',
|
||||
callee: { type: 'Identifier', name: name },
|
||||
arguments: [{ type: 'Identifier', name: 'n' }],
|
||||
optional: false,
|
||||
},
|
||||
async: false,
|
||||
expression: false,
|
||||
generator: false,
|
||||
};
|
||||
function createSolver(expression) {
|
||||
return generateArrowFunction(
|
||||
`\n({sig, n}) => {\n const url = (${astring.generate(expression)})("https://youtube.com/watch?v=yt-dlp-wins", "s", sig ? encodeURIComponent(sig) : undefined);\n url.set("n", n);\n const proto = Object.getPrototypeOf(url);\n const keys = Object.keys(proto).concat(Object.getOwnPropertyNames(proto));\n for (const key of keys) {\n if (!["constructor", "set", "get", "clone"].includes(key)) {\n url[key]();\n break;\n }\n }\n const s = url.get("s");\n return {\n sig: s ? decodeURIComponent(s) : null,\n n: url.get("n") ?? null,\n };\n}\n`,
|
||||
);
|
||||
}
|
||||
const setupNodes = meriyah.parse(
|
||||
`\nif (typeof globalThis.XMLHttpRequest === "undefined") {\n globalThis.XMLHttpRequest = { prototype: {} };\n}\nconst window = Object.create(null);\nif (typeof URL === "undefined") {\n window.location = {\n hash: "",\n host: "www.youtube.com",\n hostname: "www.youtube.com",\n href: "https://www.youtube.com/watch?v=yt-dlp-wins",\n origin: "https://www.youtube.com",\n password: "",\n pathname: "/watch",\n port: "",\n protocol: "https:",\n search: "?v=yt-dlp-wins",\n username: "",\n };\n} else {\n window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");\n}\nif (typeof globalThis.document === "undefined") {\n globalThis.document = Object.create(null);\n}\nif (typeof globalThis.navigator === "undefined") {\n globalThis.navigator = Object.create(null);\n}\nif (typeof globalThis.self === "undefined") {\n globalThis.self = globalThis;\n}\n`,
|
||||
`\nif (typeof globalThis.XMLHttpRequest === "undefined") {\n globalThis.XMLHttpRequest = { prototype: {} };\n}\nif (typeof URL === "undefined") {\n globalThis.location = {\n hash: "",\n host: "www.youtube.com",\n hostname: "www.youtube.com",\n href: "https://www.youtube.com/watch?v=yt-dlp-wins",\n origin: "https://www.youtube.com",\n password: "",\n pathname: "/watch",\n port: "",\n protocol: "https:",\n search: "?v=yt-dlp-wins",\n username: "",\n };\n} else {\n globalThis.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");\n}\nif (typeof globalThis.document === "undefined") {\n globalThis.document = Object.create(null);\n}\nif (typeof globalThis.navigator === "undefined") {\n globalThis.navigator = Object.create(null);\n}\nif (typeof globalThis.self === "undefined") {\n globalThis.self = globalThis;\n}\nif (typeof globalThis.window === "undefined") {\n globalThis.window = globalThis;\n}\n`,
|
||||
).body;
|
||||
function _optionalChain(ops) {
|
||||
let lastAccessLHS = undefined;
|
||||
@@ -419,8 +199,31 @@ var jsc = (function (meriyah, astring) {
|
||||
return value;
|
||||
}
|
||||
function preprocessPlayer(data) {
|
||||
const ast = meriyah.parse(data);
|
||||
const body = ast.body;
|
||||
const program = meriyah.parse(data);
|
||||
const plainStatements = modifyPlayer(program);
|
||||
const solutions = getSolutions(plainStatements);
|
||||
for (const [name, options] of Object.entries(solutions)) {
|
||||
plainStatements.push({
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'AssignmentExpression',
|
||||
operator: '=',
|
||||
left: {
|
||||
type: 'MemberExpression',
|
||||
computed: false,
|
||||
object: { type: 'Identifier', name: '_result' },
|
||||
property: { type: 'Identifier', name: name },
|
||||
optional: false,
|
||||
},
|
||||
right: multiTry(options),
|
||||
},
|
||||
});
|
||||
}
|
||||
program.body.splice(0, 0, ...setupNodes);
|
||||
return astring.generate(program);
|
||||
}
|
||||
function modifyPlayer(program) {
|
||||
const body = program.body;
|
||||
const block = (() => {
|
||||
switch (body.length) {
|
||||
case 1: {
|
||||
@@ -453,16 +256,7 @@ var jsc = (function (meriyah, astring) {
|
||||
}
|
||||
throw 'unexpected structure';
|
||||
})();
|
||||
const found = { n: [], sig: [] };
|
||||
const plainExpressions = block.body.filter((node) => {
|
||||
const n = extract(node);
|
||||
if (n) {
|
||||
found.n.push(n);
|
||||
}
|
||||
const sig = extract$1(node);
|
||||
if (sig) {
|
||||
found.sig.push(sig);
|
||||
}
|
||||
block.body = block.body.filter((node) => {
|
||||
if (node.type === 'ExpressionStatement') {
|
||||
if (node.expression.type === 'AssignmentExpression') {
|
||||
return true;
|
||||
@@ -471,41 +265,65 @@ var jsc = (function (meriyah, astring) {
|
||||
}
|
||||
return true;
|
||||
});
|
||||
block.body = plainExpressions;
|
||||
for (const [name, options] of Object.entries(found)) {
|
||||
const unique = new Set(options.map((x) => JSON.stringify(x)));
|
||||
if (unique.size !== 1) {
|
||||
const message = `found ${unique.size} ${name} function possibilities`;
|
||||
throw (
|
||||
message +
|
||||
(unique.size
|
||||
? `: ${options.map((x) => astring.generate(x)).join(', ')}`
|
||||
: '')
|
||||
);
|
||||
return block.body;
|
||||
}
|
||||
function getSolutions(statements) {
|
||||
const found = { n: [], sig: [] };
|
||||
for (const statement of statements) {
|
||||
const result = extract(statement);
|
||||
if (result) {
|
||||
found.n.push(makeSolver(result, { type: 'Identifier', name: 'n' }));
|
||||
found.sig.push(makeSolver(result, { type: 'Identifier', name: 'sig' }));
|
||||
}
|
||||
plainExpressions.push({
|
||||
type: 'ExpressionStatement',
|
||||
expression: {
|
||||
type: 'AssignmentExpression',
|
||||
operator: '=',
|
||||
left: {
|
||||
type: 'MemberExpression',
|
||||
computed: false,
|
||||
object: { type: 'Identifier', name: '_result' },
|
||||
property: { type: 'Identifier', name: name },
|
||||
},
|
||||
right: options[0],
|
||||
},
|
||||
});
|
||||
}
|
||||
ast.body.splice(0, 0, ...setupNodes);
|
||||
return astring.generate(ast);
|
||||
return found;
|
||||
}
|
||||
function makeSolver(result, ident) {
|
||||
return {
|
||||
type: 'ArrowFunctionExpression',
|
||||
params: [ident],
|
||||
body: {
|
||||
type: 'MemberExpression',
|
||||
object: {
|
||||
type: 'CallExpression',
|
||||
callee: result,
|
||||
arguments: [
|
||||
{
|
||||
type: 'ObjectExpression',
|
||||
properties: [
|
||||
{
|
||||
type: 'Property',
|
||||
key: ident,
|
||||
value: ident,
|
||||
kind: 'init',
|
||||
computed: false,
|
||||
method: false,
|
||||
shorthand: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
optional: false,
|
||||
},
|
||||
computed: false,
|
||||
property: ident,
|
||||
optional: false,
|
||||
},
|
||||
async: false,
|
||||
expression: true,
|
||||
generator: false,
|
||||
};
|
||||
}
|
||||
function getFromPrepared(code) {
|
||||
const resultObj = { n: null, sig: null };
|
||||
Function('_result', code)(resultObj);
|
||||
return resultObj;
|
||||
}
|
||||
function multiTry(generators) {
|
||||
return generateArrowFunction(
|
||||
`\n(_input) => {\n const _results = new Set();\n const errors = [];\n for (const _generator of ${astring.generate({ type: 'ArrayExpression', elements: generators })}) {\n try {\n _results.add(_generator(_input));\n } catch (e) {\n errors.push(e);\n }\n }\n if (!_results.size) {\n throw \`no solutions: \${errors.join(", ")}\`;\n }\n if (_results.size !== 1) {\n throw \`invalid solutions: \${[..._results].map(x => JSON.stringify(x)).join(", ")}\`;\n }\n return _results.values().next().value;\n}\n`,
|
||||
);
|
||||
}
|
||||
function main(input) {
|
||||
const preprocessedPlayer =
|
||||
input.type === 'player'
|
||||
|
||||
@@ -6,6 +6,7 @@ import dataclasses
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import traceback
|
||||
import typing
|
||||
import urllib.parse
|
||||
@@ -63,9 +64,9 @@ class YoutubeIEContentProviderLogger(IEContentProviderLogger):
|
||||
if self.log_level <= self.LogLevel.DEBUG:
|
||||
self.__ie.write_debug(self._format_msg(message), only_once=once)
|
||||
|
||||
def info(self, message: str):
|
||||
def info(self, message: str, *, once=False):
|
||||
if self.log_level <= self.LogLevel.INFO:
|
||||
self.__ie.to_screen(self._format_msg(message))
|
||||
self.__ie.to_screen(self._format_msg(message), only_once=once)
|
||||
|
||||
def warning(self, message: str, *, once=False):
|
||||
if self.log_level <= self.LogLevel.WARNING:
|
||||
@@ -433,9 +434,13 @@ def provider_display_list(providers: Iterable[IEContentProvider]):
|
||||
def clean_pot(po_token: str):
|
||||
# Clean and validate the PO Token. This will strip invalid characters off
|
||||
# (e.g. additional url params the user may accidentally include)
|
||||
mobj = re.match(r'([^?&#]+)', urllib.parse.unquote(po_token))
|
||||
if not mobj:
|
||||
raise ValueError('Invalid PO Token')
|
||||
|
||||
try:
|
||||
return base64.urlsafe_b64encode(
|
||||
base64.urlsafe_b64decode(urllib.parse.unquote(po_token))).decode()
|
||||
base64.urlsafe_b64decode(mobj.group(1))).decode()
|
||||
except (binascii.Error, ValueError):
|
||||
raise ValueError('Invalid PO Token')
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class IEContentProviderLogger(abc.ABC):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def info(self, message: str):
|
||||
def info(self, message: str, *, once=False):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
|
||||
Reference in New Issue
Block a user