restructured manifest and plugins loading; updated plugins

2025-12-29 22:50:05 -06:00
parent c74f97aca7
commit 21120cd61e
324 changed files with 18088 additions and 15974 deletions
--- a/plugins/youtube_download/yt_dlp/utils/_deprecated.py
+++ b/plugins/youtube_download/yt_dlp/utils/_deprecated.py
@@ -1,4 +1,8 @@
 """Deprecated - New code should avoid these"""
+import base64
+import hashlib
+import hmac
+import json
 import warnings

 from ..compat.compat_utils import passthrough_module
@@ -28,4 +32,18 @@ def intlist_to_bytes(xs):
    return struct.pack('%dB' % len(xs), *xs)


+def jwt_encode_hs256(payload_data, key, headers={}):
+    header_data = {
+        'alg': 'HS256',
+        'typ': 'JWT',
+    }
+    if headers:
+        header_data.update(headers)
+    header_b64 = base64.b64encode(json.dumps(header_data).encode())
+    payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
+    h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
+    signature_b64 = base64.b64encode(h.digest())
+    return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
+
+
 compiled_regex_type = type(re.compile(''))
--- a/plugins/youtube_download/yt_dlp/utils/_legacy.py
+++ b/plugins/youtube_download/yt_dlp/utils/_legacy.py
@@ -1,6 +1,4 @@
 """No longer used and new code should not use. Exists only for API compat."""
-import asyncio
-import atexit
 import platform
 import struct
 import sys
@@ -34,77 +32,6 @@ has_certifi = bool(certifi)
 has_websockets = bool(websockets)


-class WebSocketsWrapper:
-    """Wraps websockets module to use in non-async scopes"""
-    pool = None
-
-    def __init__(self, url, headers=None, connect=True, **ws_kwargs):
-        self.loop = asyncio.new_event_loop()
-        # XXX: "loop" is deprecated
-        self.conn = websockets.connect(
-            url, extra_headers=headers, ping_interval=None,
-            close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'), **ws_kwargs)
-        if connect:
-            self.__enter__()
-        atexit.register(self.__exit__, None, None, None)
-
-    def __enter__(self):
-        if not self.pool:
-            self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
-        return self
-
-    def send(self, *args):
-        self.run_with_loop(self.pool.send(*args), self.loop)
-
-    def recv(self, *args):
-        return self.run_with_loop(self.pool.recv(*args), self.loop)
-
-    def __exit__(self, type, value, traceback):
-        try:
-            return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
-        finally:
-            self.loop.close()
-            self._cancel_all_tasks(self.loop)
-
-    # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
-    # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
-    @staticmethod
-    def run_with_loop(main, loop):
-        if not asyncio.iscoroutine(main):
-            raise ValueError(f'a coroutine was expected, got {main!r}')
-
-        try:
-            return loop.run_until_complete(main)
-        finally:
-            loop.run_until_complete(loop.shutdown_asyncgens())
-            if hasattr(loop, 'shutdown_default_executor'):
-                loop.run_until_complete(loop.shutdown_default_executor())
-
-    @staticmethod
-    def _cancel_all_tasks(loop):
-        to_cancel = asyncio.all_tasks(loop)
-
-        if not to_cancel:
-            return
-
-        for task in to_cancel:
-            task.cancel()
-
-        # XXX: "loop" is removed in Python 3.10+
-        loop.run_until_complete(
-            asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
-
-        for task in to_cancel:
-            if task.cancelled():
-                continue
-            if task.exception() is not None:
-                loop.call_exception_handler({
-                    'message': 'unhandled exception during asyncio.run() shutdown',
-                    'exception': task.exception(),
-                    'task': task,
-                })
-
-
 def load_plugins(name, suffix, namespace):
    from ..plugins import load_plugins
    ret = load_plugins(name, suffix)
--- a/plugins/youtube_download/yt_dlp/utils/_utils.py
+++ b/plugins/youtube_download/yt_dlp/utils/_utils.py
@@ -47,12 +47,13 @@ import xml.etree.ElementTree
 from . import traversal

 from ..compat import (
+    compat_datetime_from_timestamp,
    compat_etree_fromstring,
    compat_expanduser,
    compat_HTMLParseError,
 )
 from ..dependencies import xattr
-from ..globals import IN_CLI
+from ..globals import IN_CLI, WINDOWS_VT_MODE

 __name__ = __name__.rsplit('.', 1)[0]  # noqa: A001 # Pretend to be the parent module

@@ -94,7 +95,7 @@ TIMEZONE_NAMES = {
 # needed for sanitizing filenames in restricted mode
 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
-                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
+                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'), strict=True))

 DATE_FORMATS = (
    '%d %B %Y',
@@ -1285,7 +1286,7 @@ def unified_timestamp(date_str, day_first=True):

    timetuple = email.utils.parsedate_tz(date_str)
    if timetuple:
-        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
+        return calendar.timegm(timetuple) + pm_delta * 3600 - int(timezone.total_seconds())


@partial_application
@@ -1376,6 +1377,7 @@ def datetime_round(dt_, precision='day'):
    if precision == 'microsecond':
        return dt_

+    time_scale = 1_000_000
    unit_seconds = {
        'day': 86400,
        'hour': 3600,
@@ -1383,8 +1385,8 @@ def datetime_round(dt_, precision='day'):
        'second': 1,
    }
    roundto = lambda x, n: ((x + n / 2) // n) * n
-    timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
-    return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
+    timestamp = roundto(calendar.timegm(dt_.timetuple()) + dt_.microsecond / time_scale, unit_seconds[precision])
+    return compat_datetime_from_timestamp(timestamp)


 def hyphenate_date(date_str):
@@ -1875,6 +1877,11 @@ def parse_resolution(s, *, lenient=False):
    if mobj:
        return {'height': int(mobj.group(1)) * 540}

+    if lenient:
+        mobj = re.search(r'(?<!\d)(\d{2,5})w(?![a-zA-Z0-9])', s)
+        if mobj:
+            return {'width': int(mobj.group(1))}
+
    return {}


@@ -2051,18 +2058,13 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
    datetime_object = None
    try:
        if isinstance(timestamp, (int, float)):  # unix timestamp
-            # Using naive datetime here can break timestamp() in Windows
-            # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
-            # Also, dt.datetime.fromtimestamp breaks for negative timestamps
-            # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
-            datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
-                               + dt.timedelta(seconds=timestamp))
+            datetime_object = compat_datetime_from_timestamp(timestamp)
        elif isinstance(timestamp, str):  # assume YYYYMMDD
            datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
        date_format = re.sub(  # Support %s on windows
            r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
        return datetime_object.strftime(date_format)
-    except (ValueError, TypeError, AttributeError):
+    except (ValueError, TypeError, AttributeError, OverflowError, OSError):
        return default


@@ -2413,7 +2415,7 @@ class PlaylistEntries:
        if self.is_incomplete:
            assert self.is_exhausted
            self._entries = [self.MissingEntry] * max(requested_entries or [0])
-            for i, entry in zip(requested_entries, entries):
+            for i, entry in zip(requested_entries, entries):  # noqa: B905
                self._entries[i - 1] = entry
        elif isinstance(entries, (list, PagedList, LazyList)):
            self._entries = entries
@@ -2943,6 +2945,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
        'x-ms-asf': 'asf',
        'x-ms-wmv': 'wmv',
        'x-msvideo': 'avi',
+        'vnd.dlna.mpeg-tts': 'mpeg',

        # application (streaming playlists)
        'dash+xml': 'mpd',
@@ -2961,6 +2964,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
        'audio/x-matroska': 'mka',
        'audio/x-mpegurl': 'm3u',
        'aacp': 'aac',
+        'flac': 'flac',
        'midi': 'mid',
        'ogg': 'ogg',
        'wav': 'wav',
@@ -3105,21 +3109,15 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
    getheader = url_handle.headers.get

-    cd = getheader('Content-Disposition')
-    if cd:
-        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
-        if m:
-            e = determine_ext(m.group('filename'), default_ext=None)
-            if e:
-                return e
+    if cd := getheader('Content-Disposition'):
+        if m := re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd):
+            if ext := determine_ext(m.group('filename'), default_ext=None):
+                return ext

-    meta_ext = getheader('x-amz-meta-name')
-    if meta_ext:
-        e = meta_ext.rpartition('.')[2]
-        if e:
-            return e
-
-    return mimetype2ext(getheader('Content-Type'), default=default)
+    return (
+        determine_ext(getheader('x-amz-meta-name'), default_ext=None)
+        or getheader('x-amz-meta-file-type')
+        or mimetype2ext(getheader('Content-Type'), default=default))


 def encode_data_uri(data, mime_type):
@@ -3186,7 +3184,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
        return len(remove_terminal_sequences(string).replace('\t', ''))

    def get_max_lens(table):
-        return [max(width(str(v)) for v in col) for col in zip(*table)]
+        return [max(width(str(v)) for v in col) for col in zip(*table, strict=True)]

    def filter_using_list(row, filter_array):
        return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take]
@@ -3542,7 +3540,7 @@ def dfxp2srt(dfxp_data):
            continue
        default_style.update(style)

-    for para, index in zip(paras, itertools.count(1)):
+    for para, index in zip(paras, itertools.count(1), strict=False):
        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
        dur = parse_dfxp_time_expr(para.attrib.get('dur'))
@@ -4739,38 +4737,49 @@ def time_seconds(**kwargs):
    return time.time() + dt.timedelta(**kwargs).total_seconds()


-# create a JSON Web Signature (jws) with HS256 algorithm
-# the resulting format is in JWS Compact Serialization
 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
-def jwt_encode_hs256(payload_data, key, headers={}):
+def jwt_encode(payload_data, key, *, alg='HS256', headers=None):
+    assert alg in ('HS256',), f'Unsupported algorithm "{alg}"'
+
+    def jwt_json_bytes(obj):
+        return json.dumps(obj, separators=(',', ':')).encode()
+
+    def jwt_b64encode(bytestring):
+        return base64.urlsafe_b64encode(bytestring).rstrip(b'=')
+
    header_data = {
-        'alg': 'HS256',
+        'alg': alg,
        'typ': 'JWT',
    }
    if headers:
-        header_data.update(headers)
-    header_b64 = base64.b64encode(json.dumps(header_data).encode())
-    payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
+        # Allow re-ordering of keys if both 'alg' and 'typ' are present
+        if 'alg' in headers and 'typ' in headers:
+            header_data = headers
+        else:
+            header_data.update(headers)
+
+    header_b64 = jwt_b64encode(jwt_json_bytes(header_data))
+    payload_b64 = jwt_b64encode(jwt_json_bytes(payload_data))
+
+    # HS256 is the only algorithm currently supported
    h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
-    signature_b64 = base64.b64encode(h.digest())
-    return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
+    signature_b64 = jwt_b64encode(h.digest())
+
+    return (header_b64 + b'.' + payload_b64 + b'.' + signature_b64).decode()


 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
 def jwt_decode_hs256(jwt):
-    header_b64, payload_b64, signature_b64 = jwt.split('.')
+    _header_b64, payload_b64, _signature_b64 = jwt.split('.')
    # add trailing ='s that may have been stripped, superfluous ='s are ignored
    return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))


-WINDOWS_VT_MODE = False if os.name == 'nt' else None
-
-
@functools.cache
 def supports_terminal_sequences(stream):
    if os.name == 'nt':
-        if not WINDOWS_VT_MODE:
+        if not WINDOWS_VT_MODE.value:
            return False
    elif not os.getenv('TERM'):
        return False
@@ -4807,8 +4816,7 @@ def windows_enable_vt_mode():
    finally:
        os.close(handle)

-    global WINDOWS_VT_MODE
-    WINDOWS_VT_MODE = True
+    WINDOWS_VT_MODE.value = True
    supports_terminal_sequences.cache_clear()


@@ -4846,7 +4854,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
    return [
        merge_dicts(
            {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
-            dict(zip(_keys, max_dimensions)), thumbnail)
+            dict(zip(_keys, max_dimensions, strict=True)), thumbnail)
        for thumbnail in thumbnails
    ]

--- a/plugins/youtube_download/yt_dlp/utils/jslib/init.py
+++ b/plugins/youtube_download/yt_dlp/utils/jslib/init.py
@@ -0,0 +1 @@
+# Utility functions for handling web input based on commonly used JavaScript libraries
--- a/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py
+++ b/plugins/youtube_download/yt_dlp/utils/jslib/devalue.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+import array
+import base64
+import datetime as dt
+import math
+import re
+
+from .._utils import parse_iso8601
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    import collections.abc
+    import typing
+
+    T = typing.TypeVar('T')
+
+
+_ARRAY_TYPE_LOOKUP = {
+    'Int8Array': 'b',
+    'Uint8Array': 'B',
+    'Uint8ClampedArray': 'B',
+    'Int16Array': 'h',
+    'Uint16Array': 'H',
+    'Int32Array': 'i',
+    'Uint32Array': 'I',
+    'Float32Array': 'f',
+    'Float64Array': 'd',
+    'BigInt64Array': 'l',
+    'BigUint64Array': 'L',
+    'ArrayBuffer': 'B',
+}
+
+
+def parse_iter(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[list], typing.Any]] | None = None):
+    # based on https://github.com/Rich-Harris/devalue/blob/f3fd2aa93d79f21746555671f955a897335edb1b/src/parse.js
+    resolved = {
+        -1: None,
+        -2: None,
+        -3: math.nan,
+        -4: math.inf,
+        -5: -math.inf,
+        -6: -0.0,
+    }
+
+    if isinstance(parsed, int) and not isinstance(parsed, bool):
+        if parsed not in resolved or parsed == -2:
+            raise ValueError('invalid integer input')
+        return resolved[parsed]
+    elif not isinstance(parsed, list):
+        raise ValueError('expected int or list as input')
+    elif not parsed:
+        raise ValueError('expected a non-empty list as input')
+
+    if revivers is None:
+        revivers = {}
+    return_value = [None]
+    stack: list[tuple] = [(return_value, 0, 0)]
+
+    while stack:
+        target, index, source = stack.pop()
+        if isinstance(source, tuple):
+            name, source, reviver = source
+            try:
+                resolved[source] = target[index] = reviver(target[index])
+            except Exception as error:
+                yield TypeError(f'failed to parse {source} as {name!r}: {error}')
+                resolved[source] = target[index] = None
+            continue
+
+        if source in resolved:
+            target[index] = resolved[source]
+            continue
+
+        # guard against Python negative indexing
+        if source < 0:
+            yield IndexError(f'invalid index: {source!r}')
+            continue
+
+        try:
+            value = parsed[source]
+        except IndexError as error:
+            yield error
+            continue
+
+        if isinstance(value, list):
+            if value and isinstance(value[0], str):
+                # TODO: implement zips `strict=True`
+                if reviver := revivers.get(value[0]):
+                    if value[1] == source:
+                        # XXX: avoid infinite loop
+                        yield IndexError(f'{value[0]!r} cannot point to itself (index: {source})')
+                        continue
+                    # inverse order: resolve index, revive value
+                    stack.append((target, index, (value[0], value[1], reviver)))
+                    stack.append((target, index, value[1]))
+                    continue
+
+                elif value[0] == 'Date':
+                    try:
+                        result = dt.datetime.fromtimestamp(parse_iso8601(value[1]), tz=dt.timezone.utc)
+                    except Exception:
+                        yield ValueError(f'invalid date: {value[1]!r}')
+                        result = None
+
+                elif value[0] == 'Set':
+                    result = [None] * (len(value) - 1)
+                    for offset, new_source in enumerate(value[1:]):
+                        stack.append((result, offset, new_source))
+
+                elif value[0] == 'Map':
+                    result = []
+                    for key, new_source in zip(*(iter(value[1:]),) * 2, strict=True):
+                        pair = [None, None]
+                        stack.append((pair, 0, key))
+                        stack.append((pair, 1, new_source))
+                        result.append(pair)
+
+                elif value[0] == 'RegExp':
+                    # XXX: use jsinterp to translate regex flags
+                    #      currently ignores `value[2]`
+                    result = re.compile(value[1])
+
+                elif value[0] == 'Object':
+                    result = value[1]
+
+                elif value[0] == 'BigInt':
+                    result = int(value[1])
+
+                elif value[0] == 'null':
+                    result = {}
+                    for key, new_source in zip(*(iter(value[1:]),) * 2, strict=True):
+                        stack.append((result, key, new_source))
+
+                elif value[0] in _ARRAY_TYPE_LOOKUP:
+                    typecode = _ARRAY_TYPE_LOOKUP[value[0]]
+                    data = base64.b64decode(value[1])
+                    result = array.array(typecode, data).tolist()
+
+                else:
+                    yield TypeError(f'invalid type at {source}: {value[0]!r}')
+                    result = None
+            else:
+                result = len(value) * [None]
+                for offset, new_source in enumerate(value):
+                    stack.append((result, offset, new_source))
+
+        elif isinstance(value, dict):
+            result = {}
+            for key, new_source in value.items():
+                stack.append((result, key, new_source))
+
+        else:
+            result = value
+
+        target[index] = resolved[source] = result
+
+    return return_value[0]
+
+
+def parse(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[typing.Any], typing.Any]] | None = None):
+    generator = parse_iter(parsed, revivers=revivers)
+    while True:
+        try:
+            raise generator.send(None)
+        except StopIteration as error:
+            return error.value
--- a/plugins/youtube_download/yt_dlp/utils/networking.py
+++ b/plugins/youtube_download/yt_dlp/utils/networking.py
@@ -10,52 +10,15 @@ import urllib.request
 if typing.TYPE_CHECKING:
    T = typing.TypeVar('T')

-from ._utils import NO_DEFAULT, remove_start
+from ._utils import NO_DEFAULT, remove_start, format_field
+from .traversal import traverse_obj


 def random_user_agent():
-    _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
-    _CHROME_VERSIONS = (
-        '90.0.4430.212',
-        '90.0.4430.24',
-        '90.0.4430.70',
-        '90.0.4430.72',
-        '90.0.4430.85',
-        '90.0.4430.93',
-        '91.0.4472.101',
-        '91.0.4472.106',
-        '91.0.4472.114',
-        '91.0.4472.124',
-        '91.0.4472.164',
-        '91.0.4472.19',
-        '91.0.4472.77',
-        '92.0.4515.107',
-        '92.0.4515.115',
-        '92.0.4515.131',
-        '92.0.4515.159',
-        '92.0.4515.43',
-        '93.0.4556.0',
-        '93.0.4577.15',
-        '93.0.4577.63',
-        '93.0.4577.82',
-        '94.0.4606.41',
-        '94.0.4606.54',
-        '94.0.4606.61',
-        '94.0.4606.71',
-        '94.0.4606.81',
-        '94.0.4606.85',
-        '95.0.4638.17',
-        '95.0.4638.50',
-        '95.0.4638.54',
-        '95.0.4638.69',
-        '95.0.4638.74',
-        '96.0.4664.18',
-        '96.0.4664.45',
-        '96.0.4664.55',
-        '96.0.4664.93',
-        '97.0.4692.20',
-    )
-    return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+    USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36'
+    # Target versions released within the last ~6 months
+    CHROME_MAJOR_VERSION_RANGE = (134, 140)
+    return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0')


 class HTTPHeaderDict(dict):
@@ -278,3 +241,16 @@ def normalize_url(url):
        query=escape_rfc3986(url_parsed.query),
        fragment=escape_rfc3986(url_parsed.fragment),
    ).geturl()
+
+
+def select_proxy(url, proxies):
+    """Unified proxy selector for all backends"""
+    url_components = urllib.parse.urlparse(url)
+    if 'no' in proxies:
+        hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
+        if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
+            return
+        elif urllib.request.proxy_bypass(hostport):  # check system settings
+            return
+
+    return traverse_obj(proxies, url_components.scheme or 'http', 'all')
				`@@ -0,0 +1 @@`
				`# Utility functions for handling web input based on commonly used JavaScript libraries`