restructured manifest and plugins loading; updated plugins

This commit is contained in:
2025-12-29 22:50:05 -06:00
parent c74f97aca7
commit 21120cd61e
324 changed files with 18088 additions and 15974 deletions

View File

@@ -1,4 +1,8 @@
"""Deprecated - New code should avoid these"""
import base64
import hashlib
import hmac
import json
import warnings
from ..compat.compat_utils import passthrough_module
@@ -28,4 +32,18 @@ def intlist_to_bytes(xs):
return struct.pack('%dB' % len(xs), *xs)
def jwt_encode_hs256(payload_data, key, headers={}):
header_data = {
'alg': 'HS256',
'typ': 'JWT',
}
if headers:
header_data.update(headers)
header_b64 = base64.b64encode(json.dumps(header_data).encode())
payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
signature_b64 = base64.b64encode(h.digest())
return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
compiled_regex_type = type(re.compile(''))

View File

@@ -1,6 +1,4 @@
"""No longer used and new code should not use. Exists only for API compat."""
import asyncio
import atexit
import platform
import struct
import sys
@@ -34,77 +32,6 @@ has_certifi = bool(certifi)
has_websockets = bool(websockets)
class WebSocketsWrapper:
"""Wraps websockets module to use in non-async scopes"""
pool = None
def __init__(self, url, headers=None, connect=True, **ws_kwargs):
self.loop = asyncio.new_event_loop()
# XXX: "loop" is deprecated
self.conn = websockets.connect(
url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'), **ws_kwargs)
if connect:
self.__enter__()
atexit.register(self.__exit__, None, None, None)
def __enter__(self):
if not self.pool:
self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
return self
def send(self, *args):
self.run_with_loop(self.pool.send(*args), self.loop)
def recv(self, *args):
return self.run_with_loop(self.pool.recv(*args), self.loop)
def __exit__(self, type, value, traceback):
try:
return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
finally:
self.loop.close()
self._cancel_all_tasks(self.loop)
# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
@staticmethod
def run_with_loop(main, loop):
if not asyncio.iscoroutine(main):
raise ValueError(f'a coroutine was expected, got {main!r}')
try:
return loop.run_until_complete(main)
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
if hasattr(loop, 'shutdown_default_executor'):
loop.run_until_complete(loop.shutdown_default_executor())
@staticmethod
def _cancel_all_tasks(loop):
to_cancel = asyncio.all_tasks(loop)
if not to_cancel:
return
for task in to_cancel:
task.cancel()
# XXX: "loop" is removed in Python 3.10+
loop.run_until_complete(
asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
for task in to_cancel:
if task.cancelled():
continue
if task.exception() is not None:
loop.call_exception_handler({
'message': 'unhandled exception during asyncio.run() shutdown',
'exception': task.exception(),
'task': task,
})
def load_plugins(name, suffix, namespace):
from ..plugins import load_plugins
ret = load_plugins(name, suffix)

View File

@@ -47,12 +47,13 @@ import xml.etree.ElementTree
from . import traversal
from ..compat import (
compat_datetime_from_timestamp,
compat_etree_fromstring,
compat_expanduser,
compat_HTMLParseError,
)
from ..dependencies import xattr
from ..globals import IN_CLI
from ..globals import IN_CLI, WINDOWS_VT_MODE
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module
@@ -94,7 +95,7 @@ TIMEZONE_NAMES = {
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'), strict=True))
DATE_FORMATS = (
'%d %B %Y',
@@ -1285,7 +1286,7 @@ def unified_timestamp(date_str, day_first=True):
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
return calendar.timegm(timetuple) + pm_delta * 3600 - int(timezone.total_seconds())
@partial_application
@@ -1376,6 +1377,7 @@ def datetime_round(dt_, precision='day'):
if precision == 'microsecond':
return dt_
time_scale = 1_000_000
unit_seconds = {
'day': 86400,
'hour': 3600,
@@ -1383,8 +1385,8 @@ def datetime_round(dt_, precision='day'):
'second': 1,
}
roundto = lambda x, n: ((x + n / 2) // n) * n
timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
timestamp = roundto(calendar.timegm(dt_.timetuple()) + dt_.microsecond / time_scale, unit_seconds[precision])
return compat_datetime_from_timestamp(timestamp)
def hyphenate_date(date_str):
@@ -1875,6 +1877,11 @@ def parse_resolution(s, *, lenient=False):
if mobj:
return {'height': int(mobj.group(1)) * 540}
if lenient:
mobj = re.search(r'(?<!\d)(\d{2,5})w(?![a-zA-Z0-9])', s)
if mobj:
return {'width': int(mobj.group(1))}
return {}
@@ -2051,18 +2058,13 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
datetime_object = None
try:
if isinstance(timestamp, (int, float)): # unix timestamp
# Using naive datetime here can break timestamp() in Windows
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
# Also, dt.datetime.fromtimestamp breaks for negative timestamps
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
+ dt.timedelta(seconds=timestamp))
datetime_object = compat_datetime_from_timestamp(timestamp)
elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
date_format = re.sub( # Support %s on windows
r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
except (ValueError, TypeError, AttributeError, OverflowError, OSError):
return default
@@ -2413,7 +2415,7 @@ class PlaylistEntries:
if self.is_incomplete:
assert self.is_exhausted
self._entries = [self.MissingEntry] * max(requested_entries or [0])
for i, entry in zip(requested_entries, entries):
for i, entry in zip(requested_entries, entries): # noqa: B905
self._entries[i - 1] = entry
elif isinstance(entries, (list, PagedList, LazyList)):
self._entries = entries
@@ -2943,6 +2945,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
'x-ms-asf': 'asf',
'x-ms-wmv': 'wmv',
'x-msvideo': 'avi',
'vnd.dlna.mpeg-tts': 'mpeg',
# application (streaming playlists)
'dash+xml': 'mpd',
@@ -2961,6 +2964,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
'audio/x-matroska': 'mka',
'audio/x-mpegurl': 'm3u',
'aacp': 'aac',
'flac': 'flac',
'midi': 'mid',
'ogg': 'ogg',
'wav': 'wav',
@@ -3105,21 +3109,15 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
getheader = url_handle.headers.get
cd = getheader('Content-Disposition')
if cd:
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
if m:
e = determine_ext(m.group('filename'), default_ext=None)
if e:
return e
if cd := getheader('Content-Disposition'):
if m := re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd):
if ext := determine_ext(m.group('filename'), default_ext=None):
return ext
meta_ext = getheader('x-amz-meta-name')
if meta_ext:
e = meta_ext.rpartition('.')[2]
if e:
return e
return mimetype2ext(getheader('Content-Type'), default=default)
return (
determine_ext(getheader('x-amz-meta-name'), default_ext=None)
or getheader('x-amz-meta-file-type')
or mimetype2ext(getheader('Content-Type'), default=default))
def encode_data_uri(data, mime_type):
@@ -3186,7 +3184,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
return len(remove_terminal_sequences(string).replace('\t', ''))
def get_max_lens(table):
return [max(width(str(v)) for v in col) for col in zip(*table)]
return [max(width(str(v)) for v in col) for col in zip(*table, strict=True)]
def filter_using_list(row, filter_array):
return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take]
@@ -3542,7 +3540,7 @@ def dfxp2srt(dfxp_data):
continue
default_style.update(style)
for para, index in zip(paras, itertools.count(1)):
for para, index in zip(paras, itertools.count(1), strict=False):
begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
dur = parse_dfxp_time_expr(para.attrib.get('dur'))
@@ -4739,38 +4737,49 @@ def time_seconds(**kwargs):
return time.time() + dt.timedelta(**kwargs).total_seconds()
# create a JSON Web Signature (jws) with HS256 algorithm
# the resulting format is in JWS Compact Serialization
# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
def jwt_encode_hs256(payload_data, key, headers={}):
def jwt_encode(payload_data, key, *, alg='HS256', headers=None):
assert alg in ('HS256',), f'Unsupported algorithm "{alg}"'
def jwt_json_bytes(obj):
return json.dumps(obj, separators=(',', ':')).encode()
def jwt_b64encode(bytestring):
return base64.urlsafe_b64encode(bytestring).rstrip(b'=')
header_data = {
'alg': 'HS256',
'alg': alg,
'typ': 'JWT',
}
if headers:
header_data.update(headers)
header_b64 = base64.b64encode(json.dumps(header_data).encode())
payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
# Allow re-ordering of keys if both 'alg' and 'typ' are present
if 'alg' in headers and 'typ' in headers:
header_data = headers
else:
header_data.update(headers)
header_b64 = jwt_b64encode(jwt_json_bytes(header_data))
payload_b64 = jwt_b64encode(jwt_json_bytes(payload_data))
# HS256 is the only algorithm currently supported
h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
signature_b64 = base64.b64encode(h.digest())
return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
signature_b64 = jwt_b64encode(h.digest())
return (header_b64 + b'.' + payload_b64 + b'.' + signature_b64).decode()
# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
def jwt_decode_hs256(jwt):
header_b64, payload_b64, signature_b64 = jwt.split('.')
_header_b64, payload_b64, _signature_b64 = jwt.split('.')
# add trailing ='s that may have been stripped, superfluous ='s are ignored
return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
WINDOWS_VT_MODE = False if os.name == 'nt' else None
@functools.cache
def supports_terminal_sequences(stream):
if os.name == 'nt':
if not WINDOWS_VT_MODE:
if not WINDOWS_VT_MODE.value:
return False
elif not os.getenv('TERM'):
return False
@@ -4807,8 +4816,7 @@ def windows_enable_vt_mode():
finally:
os.close(handle)
global WINDOWS_VT_MODE
WINDOWS_VT_MODE = True
WINDOWS_VT_MODE.value = True
supports_terminal_sequences.cache_clear()
@@ -4846,7 +4854,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
return [
merge_dicts(
{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
dict(zip(_keys, max_dimensions)), thumbnail)
dict(zip(_keys, max_dimensions, strict=True)), thumbnail)
for thumbnail in thumbnails
]

View File

@@ -0,0 +1 @@
# Utility functions for handling web input based on commonly used JavaScript libraries

View File

@@ -0,0 +1,167 @@
from __future__ import annotations
import array
import base64
import datetime as dt
import math
import re
from .._utils import parse_iso8601
TYPE_CHECKING = False
if TYPE_CHECKING:
import collections.abc
import typing
T = typing.TypeVar('T')
_ARRAY_TYPE_LOOKUP = {
'Int8Array': 'b',
'Uint8Array': 'B',
'Uint8ClampedArray': 'B',
'Int16Array': 'h',
'Uint16Array': 'H',
'Int32Array': 'i',
'Uint32Array': 'I',
'Float32Array': 'f',
'Float64Array': 'd',
'BigInt64Array': 'l',
'BigUint64Array': 'L',
'ArrayBuffer': 'B',
}
def parse_iter(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[list], typing.Any]] | None = None):
# based on https://github.com/Rich-Harris/devalue/blob/f3fd2aa93d79f21746555671f955a897335edb1b/src/parse.js
resolved = {
-1: None,
-2: None,
-3: math.nan,
-4: math.inf,
-5: -math.inf,
-6: -0.0,
}
if isinstance(parsed, int) and not isinstance(parsed, bool):
if parsed not in resolved or parsed == -2:
raise ValueError('invalid integer input')
return resolved[parsed]
elif not isinstance(parsed, list):
raise ValueError('expected int or list as input')
elif not parsed:
raise ValueError('expected a non-empty list as input')
if revivers is None:
revivers = {}
return_value = [None]
stack: list[tuple] = [(return_value, 0, 0)]
while stack:
target, index, source = stack.pop()
if isinstance(source, tuple):
name, source, reviver = source
try:
resolved[source] = target[index] = reviver(target[index])
except Exception as error:
yield TypeError(f'failed to parse {source} as {name!r}: {error}')
resolved[source] = target[index] = None
continue
if source in resolved:
target[index] = resolved[source]
continue
# guard against Python negative indexing
if source < 0:
yield IndexError(f'invalid index: {source!r}')
continue
try:
value = parsed[source]
except IndexError as error:
yield error
continue
if isinstance(value, list):
if value and isinstance(value[0], str):
# TODO: implement zips `strict=True`
if reviver := revivers.get(value[0]):
if value[1] == source:
# XXX: avoid infinite loop
yield IndexError(f'{value[0]!r} cannot point to itself (index: {source})')
continue
# inverse order: resolve index, revive value
stack.append((target, index, (value[0], value[1], reviver)))
stack.append((target, index, value[1]))
continue
elif value[0] == 'Date':
try:
result = dt.datetime.fromtimestamp(parse_iso8601(value[1]), tz=dt.timezone.utc)
except Exception:
yield ValueError(f'invalid date: {value[1]!r}')
result = None
elif value[0] == 'Set':
result = [None] * (len(value) - 1)
for offset, new_source in enumerate(value[1:]):
stack.append((result, offset, new_source))
elif value[0] == 'Map':
result = []
for key, new_source in zip(*(iter(value[1:]),) * 2, strict=True):
pair = [None, None]
stack.append((pair, 0, key))
stack.append((pair, 1, new_source))
result.append(pair)
elif value[0] == 'RegExp':
# XXX: use jsinterp to translate regex flags
# currently ignores `value[2]`
result = re.compile(value[1])
elif value[0] == 'Object':
result = value[1]
elif value[0] == 'BigInt':
result = int(value[1])
elif value[0] == 'null':
result = {}
for key, new_source in zip(*(iter(value[1:]),) * 2, strict=True):
stack.append((result, key, new_source))
elif value[0] in _ARRAY_TYPE_LOOKUP:
typecode = _ARRAY_TYPE_LOOKUP[value[0]]
data = base64.b64decode(value[1])
result = array.array(typecode, data).tolist()
else:
yield TypeError(f'invalid type at {source}: {value[0]!r}')
result = None
else:
result = len(value) * [None]
for offset, new_source in enumerate(value):
stack.append((result, offset, new_source))
elif isinstance(value, dict):
result = {}
for key, new_source in value.items():
stack.append((result, key, new_source))
else:
result = value
target[index] = resolved[source] = result
return return_value[0]
def parse(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[typing.Any], typing.Any]] | None = None):
generator = parse_iter(parsed, revivers=revivers)
while True:
try:
raise generator.send(None)
except StopIteration as error:
return error.value

View File

@@ -10,52 +10,15 @@ import urllib.request
if typing.TYPE_CHECKING:
T = typing.TypeVar('T')
from ._utils import NO_DEFAULT, remove_start
from ._utils import NO_DEFAULT, remove_start, format_field
from .traversal import traverse_obj
def random_user_agent():
_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
_CHROME_VERSIONS = (
'90.0.4430.212',
'90.0.4430.24',
'90.0.4430.70',
'90.0.4430.72',
'90.0.4430.85',
'90.0.4430.93',
'91.0.4472.101',
'91.0.4472.106',
'91.0.4472.114',
'91.0.4472.124',
'91.0.4472.164',
'91.0.4472.19',
'91.0.4472.77',
'92.0.4515.107',
'92.0.4515.115',
'92.0.4515.131',
'92.0.4515.159',
'92.0.4515.43',
'93.0.4556.0',
'93.0.4577.15',
'93.0.4577.63',
'93.0.4577.82',
'94.0.4606.41',
'94.0.4606.54',
'94.0.4606.61',
'94.0.4606.71',
'94.0.4606.81',
'94.0.4606.85',
'95.0.4638.17',
'95.0.4638.50',
'95.0.4638.54',
'95.0.4638.69',
'95.0.4638.74',
'96.0.4664.18',
'96.0.4664.45',
'96.0.4664.55',
'96.0.4664.93',
'97.0.4692.20',
)
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36'
# Target versions released within the last ~6 months
CHROME_MAJOR_VERSION_RANGE = (134, 140)
return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0')
class HTTPHeaderDict(dict):
@@ -278,3 +241,16 @@ def normalize_url(url):
query=escape_rfc3986(url_parsed.query),
fragment=escape_rfc3986(url_parsed.fragment),
).geturl()
def select_proxy(url, proxies):
"""Unified proxy selector for all backends"""
url_components = urllib.parse.urlparse(url)
if 'no' in proxies:
hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
return
elif urllib.request.proxy_bypass(hostport): # check system settings
return
return traverse_obj(proxies, url_components.scheme or 'http', 'all')