Updated ytdlp version
This commit is contained in:
parent
5264103f31
commit
ee3e042b1b
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@ import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from .compat import compat_shlex_quote
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
@ -56,11 +57,11 @@ from .utils import (
|
||||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
@ -187,8 +188,8 @@ def validate_options(opts):
|
||||
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
|
||||
|
||||
# Usernames and passwords
|
||||
validate(not opts.usenetrc or (opts.username is None and opts.password is None),
|
||||
'.netrc', msg='using {name} conflicts with giving username/password')
|
||||
validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
|
||||
msg='{name}, netrc command and username/password are mutually exclusive options')
|
||||
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
|
||||
validate(opts.ap_password is None or opts.ap_username is not None,
|
||||
'TV Provider account username', msg='{name} missing')
|
||||
@ -318,31 +319,50 @@ def validate_options(opts):
|
||||
if outtmpl_default == '':
|
||||
opts.skip_download = None
|
||||
del opts.outtmpl['default']
|
||||
if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
|
||||
raise ValueError(
|
||||
'Cannot download a video and extract audio into the same file! '
|
||||
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
|
||||
|
||||
def parse_chapters(name, value):
|
||||
chapters, ranges = [], []
|
||||
def parse_chapters(name, value, advanced=False):
|
||||
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
||||
for regex in value or []:
|
||||
if regex.startswith('*'):
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
|
||||
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
|
||||
if None in (dur or [None]):
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
|
||||
ranges.append(dur)
|
||||
continue
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
return chapters, ranges
|
||||
TIMESTAMP_RE = r'''(?x)(?:
|
||||
(?P<start_sign>-?)(?P<start>[^-]+)
|
||||
)?\s*-\s*(?:
|
||||
(?P<end_sign>-?)(?P<end>[^-]+)
|
||||
)?'''
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
|
||||
chapters, ranges, from_url = [], [], False
|
||||
for regex in value or []:
|
||||
if advanced and regex == '*from-url':
|
||||
from_url = True
|
||||
continue
|
||||
elif not regex.startswith('*'):
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
continue
|
||||
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
|
||||
dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
|
||||
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
|
||||
|
||||
err = None
|
||||
if None in (dur or [None]):
|
||||
err = 'Must be of the form "*start-end"'
|
||||
elif not advanced and any(signs):
|
||||
err = 'Negative timestamps are not allowed'
|
||||
else:
|
||||
dur[0] *= -1 if signs[0] else 1
|
||||
dur[1] *= -1 if signs[1] else 1
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
if err:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". {err}')
|
||||
ranges.append(dur)
|
||||
|
||||
return chapters, ranges, from_url
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
@ -400,14 +420,19 @@ def validate_options(opts):
|
||||
except Exception as err:
|
||||
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
|
||||
|
||||
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
|
||||
if geo_bypass_code is not None:
|
||||
opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
|
||||
if opts.geo_bypass.lower() not in ('default', 'never'):
|
||||
try:
|
||||
GeoUtils.random_ipv4(geo_bypass_code)
|
||||
GeoUtils.random_ipv4(opts.geo_bypass)
|
||||
except Exception:
|
||||
raise ValueError('unsupported geo-bypass country or ip-block')
|
||||
raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
|
||||
if len(opts.geo_bypass) == 2:
|
||||
opts.geo_bypass_country = opts.geo_bypass
|
||||
else:
|
||||
opts.geo_bypass_ip_block = opts.geo_bypass
|
||||
opts.geo_bypass = opts.geo_bypass.lower() != 'never'
|
||||
|
||||
opts.match_filter = match_filter_func(opts.match_filter)
|
||||
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
|
||||
|
||||
if opts.download_archive is not None:
|
||||
opts.download_archive = expand_path(opts.download_archive)
|
||||
@ -434,6 +459,10 @@ def validate_options(opts):
|
||||
elif ed and proto == 'default':
|
||||
default_downloader = ed.get_basename()
|
||||
|
||||
for policy in opts.color.values():
|
||||
if policy not in ('always', 'auto', 'no_color', 'never'):
|
||||
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||
|
||||
warnings, deprecation_warnings = [], []
|
||||
|
||||
# Common mistake: -f best
|
||||
@ -708,7 +737,8 @@ def parse_options(argv=None):
|
||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
))
|
||||
opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint)
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
|
||||
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
|
||||
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
|
||||
@ -734,6 +764,7 @@ def parse_options(argv=None):
|
||||
return ParsedOptions(parser, opts, urls, {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'netrc_location': opts.netrc_location,
|
||||
'netrc_cmd': opts.netrc_cmd,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
@ -891,7 +922,7 @@ def parse_options(argv=None):
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
'match_filter': opts.match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'color': opts.color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
@ -935,14 +966,18 @@ def _real_main(argv=None):
|
||||
if opts.rm_cachedir:
|
||||
ydl.cache.remove()
|
||||
|
||||
updater = Updater(ydl)
|
||||
if opts.update_self and updater.update() and actual_use:
|
||||
if updater.cmd:
|
||||
return updater.restart()
|
||||
# This code is reachable only for zip variant in py < 3.10
|
||||
# It makes sense to exit here, but the old behavior is to continue
|
||||
ydl.report_warning('Restart yt-dlp to use the updated version')
|
||||
# return 100, 'ERROR: The program must exit for the update to complete'
|
||||
try:
|
||||
updater = Updater(ydl, opts.update_self)
|
||||
if opts.update_self and updater.update() and actual_use:
|
||||
if updater.cmd:
|
||||
return updater.restart()
|
||||
# This code is reachable only for zip variant in py < 3.10
|
||||
# It makes sense to exit here, but the old behavior is to continue
|
||||
ydl.report_warning('Restart yt-dlp to use the updated version')
|
||||
# return 100, 'ERROR: The program must exit for the update to complete'
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
ydl._download_retcode = 100
|
||||
|
||||
if not actual_use:
|
||||
if pre_process:
|
||||
@ -956,6 +991,8 @@ def _real_main(argv=None):
|
||||
parser.destroy()
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
if all_urls:
|
||||
ydl.report_warning('URLs are ignored due to --load-info-json')
|
||||
return ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||
else:
|
||||
return ydl.download(all_urls)
|
||||
|
@ -1,30 +1,8 @@
|
||||
import ast
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
|
||||
|
||||
def find_attribute_accesses(node, name, path=()):
|
||||
if isinstance(node, ast.Attribute):
|
||||
path = [*path, node.attr]
|
||||
if isinstance(node.value, ast.Name) and node.value.id == name:
|
||||
yield path[::-1]
|
||||
for child in ast.iter_child_nodes(node):
|
||||
yield from find_attribute_accesses(child, name, path)
|
||||
|
||||
|
||||
def collect_used_submodules(name, level):
|
||||
for dirpath, _, filenames in os.walk(Path(__file__).parent.parent):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.py'):
|
||||
continue
|
||||
with open(Path(dirpath) / filename, encoding='utf8') as f:
|
||||
for submodule in find_attribute_accesses(ast.parse(f.read()), name):
|
||||
yield '.'.join(submodule[:level])
|
||||
|
||||
|
||||
def pycryptodome_module():
|
||||
try:
|
||||
import Cryptodome # noqa: F401
|
||||
@ -40,13 +18,10 @@ def pycryptodome_module():
|
||||
|
||||
|
||||
def get_hidden_imports():
|
||||
yield 'yt_dlp.compat._legacy'
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
|
||||
crypto = pycryptodome_module()
|
||||
for sm in set(collect_used_submodules('Cryptodome', 2)):
|
||||
yield f'{crypto}.{sm}'
|
||||
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
|
||||
|
@ -5,14 +5,14 @@ from .compat import compat_ord
|
||||
from .dependencies import Cryptodome
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
if Cryptodome:
|
||||
if Cryptodome.AES:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
""" Decrypt bytes with AES-CBC using pycryptodome """
|
||||
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data)
|
||||
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data)
|
||||
|
||||
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
""" Decrypt bytes with AES-GCM using pycryptodome """
|
||||
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
|
||||
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
|
||||
|
||||
else:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
|
@ -1,5 +1,4 @@
|
||||
import contextlib
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -39,11 +38,7 @@ class Cache:
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
os.makedirs(os.path.dirname(fn), exist_ok=True)
|
||||
self._ydl.write_debug(f'Saving {section}.{key} to cache')
|
||||
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
|
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
@ -0,0 +1,5 @@
|
||||
import warnings
|
||||
|
||||
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
|
||||
|
||||
casefold = str.casefold
|
@ -1,14 +1,11 @@
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||
passthrough_module(__name__, '._deprecated')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
@ -70,3 +67,13 @@ if compat_os_name in ('nt', 'ce'):
|
||||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
def urllib_req_to_req(urllib_request):
|
||||
"""Convert urllib Request to a networking Request"""
|
||||
from ..networking import Request
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
return Request(
|
||||
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
|
||||
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
|
||||
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
|
||||
|
@ -1,4 +1,12 @@
|
||||
"""Deprecated - New code should avoid these"""
|
||||
import warnings
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
@ -8,7 +16,6 @@ compat_str = str
|
||||
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
|
@ -1,5 +1,6 @@
|
||||
""" Do not use! """
|
||||
|
||||
import base64
|
||||
import collections
|
||||
import ctypes
|
||||
import getpass
|
||||
@ -15,12 +16,12 @@ import shlex
|
||||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import tokenize
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
@ -29,10 +30,11 @@ from asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
@ -47,41 +49,48 @@ def compat_setenv(key, value, env=os.environ):
|
||||
env[key] = value
|
||||
|
||||
|
||||
compat_base64_b64decode = base64.b64decode
|
||||
compat_basestring = str
|
||||
compat_casefold = str.casefold
|
||||
compat_chr = chr
|
||||
compat_collections_abc = collections.abc
|
||||
compat_cookiejar = http.cookiejar
|
||||
compat_cookiejar_Cookie = http.cookiejar.Cookie
|
||||
compat_cookies = http.cookies
|
||||
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||
compat_etree_Element = etree.Element
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
compat_cookiejar = compat_http_cookiejar = http.cookiejar
|
||||
compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
|
||||
compat_cookies = compat_http_cookies = http.cookies
|
||||
compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||
compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
|
||||
compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
|
||||
compat_filter = filter
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
compat_getenv = os.getenv
|
||||
compat_getpass = getpass.getpass
|
||||
compat_getpass = compat_getpass_getpass = getpass.getpass
|
||||
compat_html_entities = html.entities
|
||||
compat_html_entities_html5 = html.entities.html5
|
||||
compat_HTMLParser = html.parser.HTMLParser
|
||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
compat_os_path_realpath = compat_realpath
|
||||
compat_print = print
|
||||
compat_shlex_split = shlex.split
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
compat_Struct = struct.Struct
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
||||
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
||||
@ -90,8 +99,10 @@ compat_urllib_parse_urlunparse = urllib.parse.urlunparse
|
||||
compat_urllib_request = urllib.request
|
||||
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
||||
compat_urllib_response = urllib.response
|
||||
compat_urlretrieve = urllib.request.urlretrieve
|
||||
compat_xml_parse_error = etree.ParseError
|
||||
compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
|
||||
compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
workaround_optparse_bug9161 = lambda: None
|
||||
|
||||
legacy = []
|
||||
|
@ -48,7 +48,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
|
||||
"""Passthrough parent module into a child module, creating the parent if necessary"""
|
||||
def __getattr__(attr):
|
||||
if _is_package(parent):
|
||||
with contextlib.suppress(ImportError):
|
||||
with contextlib.suppress(ModuleNotFoundError):
|
||||
return importlib.import_module(f'.{attr}', parent.__name__)
|
||||
|
||||
ret = from_child(attr)
|
||||
|
13
plugins/youtube_download/yt_dlp/compat/types.py
Normal file
13
plugins/youtube_download/yt_dlp/compat/types.py
Normal file
@ -0,0 +1,13 @@
|
||||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
10
plugins/youtube_download/yt_dlp/compat/urllib/__init__.py
Normal file
10
plugins/youtube_download/yt_dlp/compat/urllib/__init__.py
Normal file
@ -0,0 +1,10 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib')
|
||||
del passthrough_module
|
40
plugins/youtube_download/yt_dlp/compat/urllib/request.py
Normal file
40
plugins/youtube_download/yt_dlp/compat/urllib/request.py
Normal file
@ -0,0 +1,40 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib.request import * # noqa: F403
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib.request')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
from .. import compat_os_name
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||
# it to http on these older python versions to avoid issues
|
||||
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
|
||||
# 1: https://github.com/python/cpython/issues/86793
|
||||
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
|
||||
import sys
|
||||
from urllib.request import getproxies_environment, getproxies_registry
|
||||
|
||||
def getproxies_registry_patched():
|
||||
proxies = getproxies_registry()
|
||||
if (
|
||||
sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
|
||||
or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final
|
||||
):
|
||||
return proxies
|
||||
|
||||
for scheme in ('https', 'ftp'):
|
||||
if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
|
||||
proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
|
||||
|
||||
return proxies
|
||||
|
||||
def getproxies():
|
||||
return getproxies_environment() or getproxies_registry_patched()
|
||||
|
||||
del compat_os_name
|
@ -1,7 +1,9 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -11,6 +13,7 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
@ -20,6 +23,7 @@ from .aes import (
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -28,36 +32,24 @@ from .dependencies import (
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
Popen,
|
||||
YoutubeDLCookieJar,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
is_path_like,
|
||||
sanitize_url,
|
||||
str_or_none,
|
||||
try_call,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
class YDLLogger(_YDLLogger):
|
||||
def warning(self, message, only_once=False): # compat
|
||||
return super().warning(message, once=only_once)
|
||||
|
||||
class ProgressBar(MultilinePrinter):
|
||||
_DELAY, _timer = 0.1, 0
|
||||
@ -105,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
||||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
@ -146,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
with open(containers_path, encoding='utf8') as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
@ -346,7 +338,9 @@ class ChromeCookieDecryptor:
|
||||
Linux:
|
||||
- cookies are either v10 or v11
|
||||
- v10: AES-CBC encrypted with a fixed key
|
||||
- also attempts empty password if decryption fails
|
||||
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
||||
- also attempts empty password if decryption fails
|
||||
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
||||
|
||||
Mac:
|
||||
@ -361,7 +355,7 @@ class ChromeCookieDecryptor:
|
||||
|
||||
Sources:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
|
||||
- KeyStorageLinux::CreateService
|
||||
"""
|
||||
|
||||
@ -383,32 +377,49 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
|
||||
self._v11_key = None if password is None else self.derive_key(password)
|
||||
self._empty_key = self.derive_key(b'')
|
||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||
self._browser_keyring_name = browser_keyring_name
|
||||
self._keyring = keyring
|
||||
|
||||
@functools.cached_property
|
||||
def _v11_key(self):
|
||||
password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
|
||||
return None if password is None else self.derive_key(password)
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
"""
|
||||
|
||||
following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
|
||||
with an empty password. The failure detection is not the same as what chromium uses so the
|
||||
results won't be perfect
|
||||
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
|
||||
- a bugfix to try an empty password as a fallback
|
||||
"""
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
self._cookie_counts['v10'] += 1
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
||||
|
||||
elif version == b'v11':
|
||||
self._cookie_counts['v11'] += 1
|
||||
if self._v11_key is None:
|
||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
||||
|
||||
else:
|
||||
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||
self._cookie_counts['other'] += 1
|
||||
return None
|
||||
|
||||
@ -423,7 +434,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
@ -436,12 +447,12 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
# other prefixes are considered 'old data' which were stored as plaintext
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||
return encrypted_value
|
||||
|
||||
|
||||
@ -461,7 +472,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
# kNonceLength
|
||||
nonce_length = 96 // 8
|
||||
# boringssl
|
||||
@ -478,23 +489,27 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
# any other prefix means the data is DPAPI encrypted
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
|
||||
|
||||
|
||||
def _extract_safari_cookies(profile, logger):
|
||||
if profile is not None:
|
||||
logger.error('safari does not support profiles')
|
||||
if sys.platform != 'darwin':
|
||||
raise ValueError(f'unsupported platform: {sys.platform}')
|
||||
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
logger.debug('Trying secondary cookie location')
|
||||
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
|
||||
if profile:
|
||||
cookies_path = os.path.expanduser(profile)
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
raise FileNotFoundError('custom safari cookies database not found')
|
||||
|
||||
else:
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
logger.debug('Trying secondary cookie location')
|
||||
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
|
||||
with open(cookies_path, 'rb') as f:
|
||||
cookies_data = f.read()
|
||||
@ -657,19 +672,27 @@ class _LinuxDesktopEnvironment(Enum):
|
||||
"""
|
||||
OTHER = auto()
|
||||
CINNAMON = auto()
|
||||
DEEPIN = auto()
|
||||
GNOME = auto()
|
||||
KDE = auto()
|
||||
KDE3 = auto()
|
||||
KDE4 = auto()
|
||||
KDE5 = auto()
|
||||
KDE6 = auto()
|
||||
PANTHEON = auto()
|
||||
UKUI = auto()
|
||||
UNITY = auto()
|
||||
XFCE = auto()
|
||||
LXQT = auto()
|
||||
|
||||
|
||||
class _LinuxKeyring(Enum):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
|
||||
SelectedLinuxBackend
|
||||
"""
|
||||
KWALLET = auto()
|
||||
KWALLET = auto() # KDE4
|
||||
KWALLET5 = auto()
|
||||
KWALLET6 = auto()
|
||||
GNOMEKEYRING = auto()
|
||||
BASICTEXT = auto()
|
||||
|
||||
@ -677,7 +700,7 @@ class _LinuxKeyring(Enum):
|
||||
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
|
||||
|
||||
|
||||
def _get_linux_desktop_environment(env):
|
||||
def _get_linux_desktop_environment(env, logger):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
|
||||
GetDesktopEnvironment
|
||||
@ -692,51 +715,97 @@ def _get_linux_desktop_environment(env):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif xdg_current_desktop == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif xdg_current_desktop == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif xdg_current_desktop == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif xdg_current_desktop == 'KDE':
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif xdg_current_desktop == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif xdg_current_desktop == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif xdg_current_desktop == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif xdg_current_desktop == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
else:
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session in ('mate', 'gnome'):
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'kde' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
elif 'xfce' in desktop_session:
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
else:
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
def _choose_linux_keyring(logger):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
|
||||
SelectBackend
|
||||
SelectBackend in [1]
|
||||
|
||||
There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
|
||||
`Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
|
||||
does not appear to be called anywhere other than in tests, so the user would have to create this file manually
|
||||
and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
|
||||
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
|
||||
"""
|
||||
desktop_environment = _get_linux_desktop_environment(os.environ)
|
||||
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
|
||||
logger.debug(f'detected desktop environment: {desktop_environment.name}')
|
||||
if desktop_environment == _LinuxDesktopEnvironment.KDE:
|
||||
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
|
||||
linux_keyring = _LinuxKeyring.KWALLET
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
|
||||
linux_keyring = _LinuxKeyring.KWALLET5
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
linux_keyring = _LinuxKeyring.KWALLET6
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
linux_keyring = _LinuxKeyring.GNOMEKEYRING
|
||||
return linux_keyring
|
||||
|
||||
|
||||
def _get_kwallet_network_wallet(logger):
|
||||
def _get_kwallet_network_wallet(keyring, logger):
|
||||
""" The name of the wallet used to store network passwords.
|
||||
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
|
||||
KWalletDBus::NetworkWallet
|
||||
which does a dbus call to the following function:
|
||||
https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
|
||||
@ -744,10 +813,22 @@ def _get_kwallet_network_wallet(logger):
|
||||
"""
|
||||
default_wallet = 'kdewallet'
|
||||
try:
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
service_name = 'org.kde.kwalletd'
|
||||
wallet_path = '/modules/kwalletd'
|
||||
elif keyring == _LinuxKeyring.KWALLET5:
|
||||
service_name = 'org.kde.kwalletd5'
|
||||
wallet_path = '/modules/kwalletd5'
|
||||
elif keyring == _LinuxKeyring.KWALLET6:
|
||||
service_name = 'org.kde.kwalletd6'
|
||||
wallet_path = '/modules/kwalletd6'
|
||||
else:
|
||||
raise ValueError(keyring)
|
||||
|
||||
stdout, _, returncode = Popen.run([
|
||||
'dbus-send', '--session', '--print-reply=literal',
|
||||
'--dest=org.kde.kwalletd5',
|
||||
'/modules/kwalletd5',
|
||||
f'--dest={service_name}',
|
||||
wallet_path,
|
||||
'org.kde.KWallet.networkWallet'
|
||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
@ -762,8 +843,8 @@ def _get_kwallet_network_wallet(logger):
|
||||
return default_wallet
|
||||
|
||||
|
||||
def _get_kwallet_password(browser_keyring_name, logger):
|
||||
logger.debug('using kwallet-query to obtain password from kwallet')
|
||||
def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||
logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
|
||||
|
||||
if shutil.which('kwallet-query') is None:
|
||||
logger.error('kwallet-query command not found. KWallet and kwallet-query '
|
||||
@ -771,7 +852,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||
'included in the kwallet package for your distribution')
|
||||
return b''
|
||||
|
||||
network_wallet = _get_kwallet_network_wallet(logger)
|
||||
network_wallet = _get_kwallet_network_wallet(keyring, logger)
|
||||
|
||||
try:
|
||||
stdout, _, returncode = Popen.run([
|
||||
@ -793,8 +874,9 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||
# checks hasEntry. To verify this:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
# while starting chrome.
|
||||
# this may be a bug as the intended behaviour is to generate a random password and store
|
||||
# it, but that doesn't matter here.
|
||||
# this was identified as a bug later and fixed in
|
||||
# https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
|
||||
# https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
|
||||
return b''
|
||||
else:
|
||||
logger.debug('password found')
|
||||
@ -832,8 +914,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
||||
logger.debug(f'Chosen keyring: {keyring.name}')
|
||||
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
return _get_kwallet_password(browser_keyring_name, logger)
|
||||
if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
|
||||
return _get_kwallet_password(browser_keyring_name, keyring, logger)
|
||||
elif keyring == _LinuxKeyring.GNOMEKEYRING:
|
||||
return _get_gnome_keyring_password(browser_keyring_name, logger)
|
||||
elif keyring == _LinuxKeyring.BASICTEXT:
|
||||
@ -861,6 +943,10 @@ def _get_mac_keyring_password(browser_keyring_name, logger):
|
||||
|
||||
|
||||
def _get_windows_v10_key(browser_root, logger):
|
||||
"""
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
@ -869,11 +955,13 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
with open(path, encoding='utf8') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
# kOsCryptEncryptedKeyPrefName in [1]
|
||||
base64_key = data['os_crypt']['encrypted_key']
|
||||
except KeyError:
|
||||
logger.error('no encrypted key in Local State')
|
||||
return None
|
||||
encrypted_key = base64.b64decode(base64_key)
|
||||
# kDPAPIKeyPrefix in [1]
|
||||
prefix = b'DPAPI'
|
||||
if not encrypted_key.startswith(prefix):
|
||||
logger.error('invalid key')
|
||||
@ -885,13 +973,15 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||
for key in keys:
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
@ -1085,3 +1175,150 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
|
||||
else:
|
||||
morsel = None
|
||||
|
||||
|
||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
"""
|
||||
See [1] for cookie file format.
|
||||
|
||||
1. https://curl.haxx.se/docs/http-cookies.html
|
||||
"""
|
||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||
_ENTRY_LEN = 7
|
||||
_HEADER = '''# Netscape HTTP Cookie File
|
||||
# This file is generated by yt-dlp. Do not edit.
|
||||
|
||||
'''
|
||||
_CookieFileEntry = collections.namedtuple(
|
||||
'CookieFileEntry',
|
||||
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
||||
|
||||
def __init__(self, filename=None, *args, **kwargs):
|
||||
super().__init__(None, *args, **kwargs)
|
||||
if is_path_like(filename):
|
||||
filename = os.fspath(filename)
|
||||
self.filename = filename
|
||||
|
||||
@staticmethod
|
||||
def _true_or_false(cndn):
|
||||
return 'TRUE' if cndn else 'FALSE'
|
||||
|
||||
@contextlib.contextmanager
|
||||
def open(self, file, *, write=False):
|
||||
if is_path_like(file):
|
||||
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
||||
yield f
|
||||
else:
|
||||
if write:
|
||||
file.truncate(0)
|
||||
yield file
|
||||
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
or not ignore_expires and cookie.is_expired(now)):
|
||||
continue
|
||||
name, value = cookie.name, cookie.value
|
||||
if value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas http.cookiejar regards it as a
|
||||
# cookie with no value.
|
||||
name, value = '', name
|
||||
f.write('%s\n' % '\t'.join((
|
||||
cookie.domain,
|
||||
self._true_or_false(cookie.domain.startswith('.')),
|
||||
cookie.path,
|
||||
self._true_or_false(cookie.secure),
|
||||
str_or_none(cookie.expires, default=''),
|
||||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
Save cookies to a file.
|
||||
Code is taken from CPython 3.6
|
||||
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
||||
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
filename = self.filename
|
||||
else:
|
||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||
|
||||
# Store session cookies with `expires` set to 0 instead of an empty string
|
||||
for cookie in self:
|
||||
if cookie.expires is None:
|
||||
cookie.expires = 0
|
||||
|
||||
with self.open(filename, write=True) as f:
|
||||
f.write(self._HEADER)
|
||||
self._really_save(f, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
filename = self.filename
|
||||
else:
|
||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||
|
||||
def prepare_line(line):
|
||||
if line.startswith(self._HTTPONLY_PREFIX):
|
||||
line = line[len(self._HTTPONLY_PREFIX):]
|
||||
# comments and empty lines are fine
|
||||
if line.startswith('#') or not line.strip():
|
||||
return line
|
||||
cookie_list = line.split('\t')
|
||||
if len(cookie_list) != self._ENTRY_LEN:
|
||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||
cookie = self._CookieFileEntry(*cookie_list)
|
||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||
return line
|
||||
|
||||
cf = io.StringIO()
|
||||
with self.open(filename) as f:
|
||||
for line in f:
|
||||
try:
|
||||
cf.write(prepare_line(line))
|
||||
except http.cookiejar.LoadError as e:
|
||||
if f'{line.strip()} '[0] in '[{"':
|
||||
raise http.cookiejar.LoadError(
|
||||
'Cookies file must be Netscape formatted, not JSON. See '
|
||||
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
||||
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
||||
continue
|
||||
cf.seek(0)
|
||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||
# Session cookies are denoted by either `expires` field set to
|
||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||
# (see [1]). So we need force the latter to be recognized as session
|
||||
# cookies on our own.
|
||||
# Session cookies may be important for cookies-based authentication,
|
||||
# e.g. usually, when user does not check 'Remember me' check box while
|
||||
# logging in on a site, some important cookies are stored as session
|
||||
# cookies so that not recognizing them will result in failed login.
|
||||
# 1. https://bugs.python.org/issue17164
|
||||
for cookie in self:
|
||||
# Treat `expires=0` cookies as session cookies
|
||||
if cookie.expires == 0:
|
||||
cookie.expires = None
|
||||
cookie.discard = True
|
||||
|
||||
def get_cookie_header(self, url):
|
||||
"""Generate a Cookie HTTP header for a given url"""
|
||||
cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
|
||||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
def get_cookies_for_url(self, url):
|
||||
"""Generate a list of Cookie objects for a given url"""
|
||||
# Policy `_now` attribute must be set before calling `_cookies_for_request`
|
||||
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
|
||||
self._policy._now = self._now = int(time.time())
|
||||
return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
return super().clear(*args, **kwargs)
|
||||
|
@ -1,6 +1,3 @@
|
||||
import types
|
||||
|
||||
from ..compat import functools
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
|
||||
try:
|
||||
@ -9,22 +6,33 @@ except ImportError:
|
||||
try:
|
||||
import Crypto as _parent
|
||||
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
|
||||
_parent = types.ModuleType('no_Cryptodome')
|
||||
_parent = passthrough_module(__name__, 'no_Cryptodome')
|
||||
__bool__ = lambda: False
|
||||
|
||||
passthrough_module(__name__, _parent, (..., '__version__'))
|
||||
del passthrough_module
|
||||
|
||||
__version__ = ''
|
||||
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
|
||||
try:
|
||||
if _parent.__name__ == 'Cryptodome':
|
||||
from Cryptodome import __version__
|
||||
from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5
|
||||
from Cryptodome.Hash import CMAC, SHA1
|
||||
from Cryptodome.PublicKey import RSA
|
||||
elif _parent.__name__ == 'Crypto':
|
||||
from Crypto import __version__
|
||||
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
|
||||
from Crypto.Hash import CMAC, SHA1 # noqa: F401
|
||||
from Crypto.PublicKey import RSA # noqa: F401
|
||||
except ImportError:
|
||||
__version__ = f'broken {__version__}'.strip()
|
||||
|
||||
@property
|
||||
@functools.cache
|
||||
def _yt_dlp__identifier():
|
||||
if _parent.__name__ == 'Crypto':
|
||||
from Crypto.Cipher import AES
|
||||
try:
|
||||
# In pycrypto, mode defaults to ECB. See:
|
||||
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
|
||||
AES.new(b'abcdefghijklmnop')
|
||||
except TypeError:
|
||||
return 'pycrypto'
|
||||
return _parent.__name__
|
||||
|
||||
_yt_dlp__identifier = _parent.__name__
|
||||
if AES and _yt_dlp__identifier == 'Crypto':
|
||||
try:
|
||||
# In pycrypto, mode defaults to ECB. See:
|
||||
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
|
||||
AES.new(b'abcdefghijklmnop')
|
||||
except TypeError:
|
||||
_yt_dlp__identifier = 'pycrypto'
|
||||
|
@ -73,7 +73,7 @@ available_dependencies = {k: v for k, v in all_dependencies.items() if v}
|
||||
|
||||
|
||||
# Deprecated
|
||||
Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None
|
||||
Cryptodome_AES = Cryptodome.AES
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
@ -30,7 +30,7 @@ from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
@ -50,6 +50,7 @@ PROTOCOL_MAP = {
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
|
@ -49,10 +49,10 @@ class FileDownloader:
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
continuedl: Attempt to continue downloads if possible
|
||||
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
file_access_retries: Number of times to retry on file access error
|
||||
retries: Number of times to retry for expected network errors.
|
||||
Default is 0 for API, but 10 for CLI
|
||||
file_access_retries: Number of times to retry on file access error (default: 3)
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
@ -138,17 +138,21 @@ class FileDownloader:
|
||||
def format_percent(percent):
|
||||
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
@classmethod
|
||||
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
|
||||
if total is NO_DEFAULT:
|
||||
rate, remaining = start_or_rate, now_or_remaining
|
||||
if None in (rate, remaining):
|
||||
return None
|
||||
return int(float(remaining) / rate)
|
||||
|
||||
start, now = start_or_rate, now_or_remaining
|
||||
if total is None:
|
||||
return None
|
||||
if now is None:
|
||||
now = time.time()
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
rate = cls.calc_speed(start, now, current)
|
||||
return rate and int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
@ -165,6 +169,12 @@ class FileDownloader:
|
||||
def format_retries(retries):
|
||||
return 'inf' if retries == float('inf') else int(retries)
|
||||
|
||||
@staticmethod
|
||||
def filesize_or_none(unencoded_filename):
|
||||
if os.path.isfile(unencoded_filename):
|
||||
return os.path.getsize(unencoded_filename)
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
@ -225,7 +235,7 @@ class FileDownloader:
|
||||
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
|
||||
|
||||
def wrapper(self, func, *args, **kwargs):
|
||||
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
|
||||
for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except OSError as err:
|
||||
@ -245,7 +255,8 @@ class FileDownloader:
|
||||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
os.remove(filename)
|
||||
if os.path.isfile(filename):
|
||||
os.remove(filename)
|
||||
|
||||
@wrap_file_access('rename')
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
@ -285,7 +296,8 @@ class FileDownloader:
|
||||
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
|
||||
else:
|
||||
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
|
||||
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
|
||||
self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
|
||||
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
|
||||
|
||||
def _finish_multiline_status(self):
|
||||
self._multiline.end()
|
||||
@ -407,7 +419,6 @@ class FileDownloader:
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
|
@ -1,14 +1,16 @@
|
||||
import enum
|
||||
import json
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
Popen,
|
||||
@ -23,9 +25,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
find_available_port,
|
||||
handle_youtubedl_headers,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
@ -43,6 +43,7 @@ class ExternalFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
self._cookies_tempfile = None
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
@ -55,6 +56,9 @@ class ExternalFD(FragmentFD):
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
|
||||
if retval == 0:
|
||||
status = {
|
||||
@ -126,6 +130,16 @@ class ExternalFD(FragmentFD):
|
||||
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
|
||||
keys, *args, **kwargs)
|
||||
|
||||
def _write_cookies(self):
|
||||
if not self.ydl.cookiejar.filename:
|
||||
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||
tmp_cookies.close()
|
||||
self._cookies_tempfile = tmp_cookies.name
|
||||
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
|
||||
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile)
|
||||
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
@ -176,7 +190,7 @@ class ExternalFD(FragmentFD):
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
|
||||
return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
|
||||
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
@ -185,6 +199,9 @@ class CurlFD(ExternalFD):
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['--cookie', cookie_header]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -215,6 +232,9 @@ class AxelFD(ExternalFD):
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', f'{key}: {val}']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@ -224,7 +244,9 @@ class WgetFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += ['--load-cookies', self._write_cookies()]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -272,7 +294,7 @@ class Aria2cFD(ExternalFD):
|
||||
return super()._call_downloader(tmpfilename, info_dict)
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
cmd = [self.exe, '-c', '--no-conf',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
@ -280,6 +302,8 @@ class Aria2cFD(ExternalFD):
|
||||
else:
|
||||
cmd += ['--min-split-size', '1M']
|
||||
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += [f'--load-cookies={self._write_cookies()}']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -334,13 +358,12 @@ class Aria2cFD(ExternalFD):
|
||||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': f'{len(d)}',
|
||||
'Ytdl-request-proxy': '__noproxy__',
|
||||
})
|
||||
}, proxies={'all': None})
|
||||
with self.ydl.urlopen(request) as r:
|
||||
resp = json.load(r)
|
||||
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
|
||||
@ -418,6 +441,14 @@ class HttpieFD(ExternalFD):
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += [f'{key}:{val}']
|
||||
|
||||
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||
# 2: https://httpie.io/docs/cli/sessions
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += [f'Cookie:{cookie_header}']
|
||||
return cmd
|
||||
|
||||
|
||||
@ -528,11 +559,16 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
|
||||
for cookie in cookies)])
|
||||
if fmt.get('http_headers') and is_http:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||
|
||||
if start_time:
|
||||
args += ['-ss', str(start_time)]
|
||||
|
@ -3,11 +3,11 @@ import io
|
||||
import itertools
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import fix_xml_ampersands, xpath_text
|
||||
|
||||
|
||||
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||
@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
|
||||
if box_type == b'mdat':
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
except urllib.error.HTTPError as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
except HTTPError as err:
|
||||
if live and (err.status == 404 or err.status == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
|
@ -1,24 +1,19 @@
|
||||
import concurrent.futures
|
||||
import contextlib
|
||||
import http.client
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
RetryManager,
|
||||
encodeFilename,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
@ -34,8 +29,8 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error
|
||||
(DASH and hlsnative only). Default is 0 for API, but 10 for CLI
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||
@ -75,7 +70,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
return Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
@ -121,6 +116,11 @@ class FragmentFD(FileDownloader):
|
||||
'request_data': request_data,
|
||||
'ctx_id': ctx.get('ctx_id'),
|
||||
}
|
||||
frag_resume_len = 0
|
||||
if ctx['dl'].params.get('continuedl', True):
|
||||
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
|
||||
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
|
||||
|
||||
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||
if not success:
|
||||
return False
|
||||
@ -155,9 +155,7 @@ class FragmentFD(FileDownloader):
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
if not ctx['live']:
|
||||
if not ctx.setdefault('live', False):
|
||||
total_frags_str = '%d' % ctx['total_frags']
|
||||
ad_frags = ctx.get('ad_frags', 0)
|
||||
if ad_frags:
|
||||
@ -170,15 +168,17 @@ class FragmentFD(FileDownloader):
|
||||
**self.params,
|
||||
'noprogress': True,
|
||||
'test': False,
|
||||
'sleep_interval': 0,
|
||||
'max_sleep_interval': 0,
|
||||
'sleep_interval_subtitles': 0,
|
||||
})
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
open_mode = 'wb'
|
||||
resume_len = 0
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = self.filesize_or_none(tmpfilename)
|
||||
if resume_len > 0:
|
||||
open_mode = 'ab'
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
||||
# Should be initialized before ytdl file check
|
||||
ctx.update({
|
||||
@ -187,7 +187,9 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||
continuedl = self.params.get('continuedl', True)
|
||||
if continuedl and ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||
@ -201,7 +203,12 @@ class FragmentFD(FileDownloader):
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
self._write_ytdl_file(ctx)
|
||||
|
||||
else:
|
||||
if not continuedl:
|
||||
if ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
self._write_ytdl_file(ctx)
|
||||
assert ctx['fragment_index'] == 0
|
||||
|
||||
@ -274,12 +281,10 @@ class FragmentFD(FileDownloader):
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size - resume_len,
|
||||
state['downloaded_bytes'] - resume_len)
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes)
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
@ -290,14 +295,12 @@ class FragmentFD(FileDownloader):
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
self.try_remove(ytdl_filename)
|
||||
self.try_remove(self.ytdl_filename(ctx['filename']))
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
to_file = ctx['tmpfilename'] != '-'
|
||||
if to_file:
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
|
||||
downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
|
||||
else:
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
|
||||
@ -449,7 +452,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
headers = info_dict.get('http_headers', {}).copy()
|
||||
headers = HTTPHeaderDict(info_dict.get('http_headers'))
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
@ -466,9 +469,10 @@ class FragmentFD(FileDownloader):
|
||||
for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
|
||||
try:
|
||||
ctx['fragment_count'] = fragment.get('fragment_count')
|
||||
if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
|
||||
if not self._download_fragment(
|
||||
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
|
||||
return
|
||||
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
|
||||
except (HTTPError, IncompleteRead) as err:
|
||||
retry.error = err
|
||||
continue
|
||||
except DownloadError: # has own retry settings
|
||||
@ -496,7 +500,7 @@ class FragmentFD(FileDownloader):
|
||||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest, info_dict, allow_unplayable_formats=False):
|
||||
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
|
||||
return bool(re.search('|'.join((
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
)), manifest))
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
|
||||
UNSUPPORTED_FEATURES = [
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
|
||||
]
|
||||
if not allow_unplayable_formats:
|
||||
UNSUPPORTED_FEATURES += [
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
|
||||
]
|
||||
|
||||
def check_results():
|
||||
yield not info_dict.get('is_live')
|
||||
for feature in UNSUPPORTED_FEATURES:
|
||||
yield not re.search(feature, manifest)
|
||||
if not allow_unplayable_formats:
|
||||
yield not cls._has_drm(manifest)
|
||||
return all(check_results())
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@ -64,13 +75,13 @@ class HlsFD(FragmentFD):
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
has_ffmpeg = FFmpegFD.available()
|
||||
no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
if no_crypto and has_ffmpeg:
|
||||
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
|
||||
elif no_crypto:
|
||||
@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
|
||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
||||
if not can_download:
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), s)
|
||||
if has_drm and not self.params.get('allow_unplayable_formats'):
|
||||
self.report_error(
|
||||
'This video is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format')
|
||||
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
|
||||
if info_dict.get('has_drm') and self.params.get('test'):
|
||||
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
|
||||
else:
|
||||
self.report_error(
|
||||
'This format is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format', tb=False)
|
||||
return False
|
||||
message = message or 'Unsupported features have been detected'
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
|
@ -1,12 +1,14 @@
|
||||
import http.client
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
TransportError,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
RetryManager,
|
||||
@ -16,18 +18,10 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
try_call,
|
||||
write_xattr,
|
||||
)
|
||||
|
||||
RESPONSE_READ_EXCEPTIONS = (
|
||||
TimeoutError,
|
||||
socket.timeout, # compat: py < 3.10
|
||||
ConnectionError,
|
||||
ssl.SSLError,
|
||||
http.client.HTTPException
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
@ -45,11 +39,8 @@ class HttpFD(FileDownloader):
|
||||
ctx.tmpfilename = self.temp_name(filename)
|
||||
ctx.stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
# Disable compression
|
||||
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
|
||||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = sanitized_Request(url, request_data, headers)
|
||||
request = Request(url, request_data, headers)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
|
||||
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
@ -150,20 +141,21 @@ class HttpFD(FileDownloader):
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
elif range_start > 0:
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code == 416:
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
|
||||
except HTTPError as err:
|
||||
if err.status == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, request_data, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
Request(url, request_data, headers))
|
||||
content_length = ctx.data.headers['Content-Length']
|
||||
except HTTPError as err:
|
||||
if err.status < 500 or err.status >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
@ -191,17 +183,13 @@ class HttpFD(FileDownloader):
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
elif err.status < 500 or err.status >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except urllib.error.URLError as err:
|
||||
if isinstance(err.reason, ssl.CertificateError):
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
||||
# Any errors that occur during this will not be wrapped by URLError
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except CertificateVerifyError:
|
||||
raise
|
||||
except TransportError as err:
|
||||
raise RetryDownload(err)
|
||||
|
||||
def close_stream():
|
||||
@ -211,7 +199,12 @@ class HttpFD(FileDownloader):
|
||||
ctx.stream = None
|
||||
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
data_len = ctx.data.headers.get('Content-length')
|
||||
|
||||
if ctx.data.headers.get('Content-encoding'):
|
||||
# Content-encoding is present, Content-length is not reliable anymore as we are
|
||||
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
|
||||
data_len = None
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
@ -252,7 +245,7 @@ class HttpFD(FileDownloader):
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
retry(err)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
@ -333,15 +326,15 @@ class HttpFD(FileDownloader):
|
||||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
@ -353,7 +346,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
@ -2,9 +2,9 @@ import binascii
|
||||
import io
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import RetryManager
|
||||
|
||||
u8 = struct.Struct('>B')
|
||||
@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
extra_state['ism_track_written'] = True
|
||||
self._append_fragment(ctx, frag_content)
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
|
||||
|
@ -1,8 +1,12 @@
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from ..utils import sanitized_Request
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
@ -24,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
@ -50,3 +54,93 @@ class NiconicoDmcFD(FileDownloader):
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
cookies = info_dict.get('cookies')
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
|
||||
new_info_dict = info_dict.copy()
|
||||
new_info_dict.update({
|
||||
'protocol': 'm3u8',
|
||||
})
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookies': str_or_none(cookies) or '',
|
||||
'Origin': f'https://{ws_origin_host}',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||
})
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'stream': {
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'chasePlay': False
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True
|
||||
},
|
||||
'reconnect': True,
|
||||
}
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
with ws:
|
||||
while True:
|
||||
recv = ws.recv()
|
||||
if not recv:
|
||||
continue
|
||||
data = json.loads(recv)
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
if data.get('type') == 'ping':
|
||||
# pong back
|
||||
ws.send(r'{"type":"pong"}')
|
||||
ws.send(r'{"type":"keepSeat"}')
|
||||
elif data.get('type') == 'disconnect':
|
||||
self.write_debug(data)
|
||||
return True
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(data)
|
||||
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
||||
return DownloadError(message)
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
while True:
|
||||
try:
|
||||
ret = communicate_ws(reconnect)
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
reconnect = True
|
||||
|
||||
thread = threading.Thread(target=ws_main, daemon=True)
|
||||
thread.start()
|
||||
|
||||
return dl.download(filename, new_info_dict)
|
||||
|
@ -1,8 +1,8 @@
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class YoutubeLiveChatFD(FragmentFD):
|
||||
@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
start_time = int(time.time() * 1000)
|
||||
|
||||
def dl_fragment(url, data=None, headers=None):
|
||||
http_headers = info_dict.get('http_headers', {})
|
||||
if headers:
|
||||
http_headers = http_headers.copy()
|
||||
http_headers.update(headers)
|
||||
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
return False, None, None, None
|
||||
|
@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
@ -102,6 +101,7 @@ from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .anchorfm import AnchorFMEpisodeIE
|
||||
from .angel import AngelIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
@ -203,13 +203,18 @@ from .bfmtv import (
|
||||
BFMTVLiveIE,
|
||||
BFMTVArticleIE,
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bibeltv import (
|
||||
BibelTVLiveIE,
|
||||
BibelTVSeriesIE,
|
||||
BibelTVVideoIE,
|
||||
)
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
@ -238,19 +243,28 @@ from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .booyah import BooyahClipsIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
BRMediathekIE,
|
||||
)
|
||||
from .bravotv import BravoTVIE
|
||||
from .brainpop import (
|
||||
BrainPOPIE,
|
||||
BrainPOPJrIE,
|
||||
BrainPOPELLIE,
|
||||
BrainPOPEspIE,
|
||||
BrainPOPFrIE,
|
||||
BrainPOPIlIE,
|
||||
)
|
||||
from .breakcom import BreakIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
@ -270,6 +284,10 @@ from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .camfm import (
|
||||
CamFMEpisodeIE,
|
||||
CamFMShowIE
|
||||
)
|
||||
from .cammodels import CamModelsIE
|
||||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
@ -277,12 +295,6 @@ from .camwithher import CamWithHerIE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
VrtNUIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
@ -295,15 +307,18 @@ from .cbc import (
|
||||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
from .cbs import (
|
||||
CBSIE,
|
||||
ParamountPressExpressIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
CBSNewsIE,
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
CBSLocalLiveIE,
|
||||
CBSNewsLiveIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import (
|
||||
@ -342,6 +357,7 @@ from .ciscolive import (
|
||||
)
|
||||
from .ciscowebex import CiscoWebexIE
|
||||
from .cjsw import CJSWIE
|
||||
from .clipchamp import ClipchampIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
@ -389,9 +405,12 @@ from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
CrowdBunkerChannelIE,
|
||||
)
|
||||
from .crtvg import CrtvgIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
CrunchyrollMusicIE,
|
||||
CrunchyrollArtistIE,
|
||||
)
|
||||
from .cspan import CSpanIE, CSpanCongressIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
@ -408,6 +427,10 @@ from .cybrary import (
|
||||
CybraryIE,
|
||||
CybraryCourseIE
|
||||
)
|
||||
from .dacast import (
|
||||
DacastVODIE,
|
||||
DacastPlaylistIE,
|
||||
)
|
||||
from .daftsex import DaftsexIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@ -438,6 +461,10 @@ from .deezer import (
|
||||
)
|
||||
from .democracynow import DemocracynowIE
|
||||
from .detik import DetikEmbedIE
|
||||
from .dlf import (
|
||||
DLFIE,
|
||||
DLFCorpusIE,
|
||||
)
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
@ -470,6 +497,7 @@ from .dplay import (
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
@ -493,6 +521,7 @@ from .deuxm import (
|
||||
DeuxMNewsIE
|
||||
)
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
@ -507,6 +536,7 @@ from .dw import (
|
||||
)
|
||||
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ebay import EbayIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .egghead import (
|
||||
EggheadCourseIE,
|
||||
@ -516,6 +546,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elevensports import ElevenSportsIE
|
||||
from .ellentube import (
|
||||
EllenTubeIE,
|
||||
EllenTubeVideoIE,
|
||||
@ -549,6 +580,7 @@ from .espn import (
|
||||
ESPNCricInfoIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .ettutv import EttuTvIE
|
||||
from .europa import EuropaIE, EuroParlWebstreamIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
@ -635,6 +667,7 @@ from .funimation import (
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fusion import FusionIE
|
||||
from .fuyintv import FuyinTVIE
|
||||
from .gab import (
|
||||
@ -670,10 +703,18 @@ from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globalplayer import (
|
||||
GlobalPlayerLiveIE,
|
||||
GlobalPlayerLivePlaylistIE,
|
||||
GlobalPlayerAudioIE,
|
||||
GlobalPlayerAudioEpisodeIE,
|
||||
GlobalPlayerVideoIE
|
||||
)
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .gmanetwork import GMANetworkVideoIE
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .gofile import GofileIE
|
||||
@ -705,13 +746,16 @@ from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hollywoodreporter import (
|
||||
HollywoodReporterIE,
|
||||
HollywoodReporterPlaylistIE,
|
||||
)
|
||||
from .holodex import HolodexIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
@ -723,6 +767,7 @@ from .hotstar import (
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
from .hrfensehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
@ -745,12 +790,14 @@ from .hungama import (
|
||||
HungamaAlbumPlaylistIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
from .icareus import IcareusIE
|
||||
from .ichinanalive import (
|
||||
IchinanaLiveIE,
|
||||
IchinanaLiveClipIE,
|
||||
)
|
||||
from .idolplus import IdolPlusIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
IGNVideoIE,
|
||||
@ -835,6 +882,7 @@ from .japandiet import (
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jstream import JStreamIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
@ -844,7 +892,6 @@ from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
@ -857,6 +904,7 @@ from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
from .kompas import KompasVideoIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .koo import KooIE
|
||||
@ -908,6 +956,10 @@ from .leeco import (
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .lefigaro import (
|
||||
LeFigaroVideoEmbedIE,
|
||||
LeFigaroVideoSectionIE,
|
||||
)
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .lenta import LentaIE
|
||||
@ -926,10 +978,6 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import (
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInIE,
|
||||
LinkedInLearningIE,
|
||||
@ -956,11 +1004,15 @@ from .lrt import (
|
||||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
@ -1069,7 +1121,8 @@ from .mojvideo import MojvideoIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
@ -1089,6 +1142,7 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
@ -1110,6 +1164,7 @@ from .myvi import (
|
||||
)
|
||||
from .myvideoge import MyVideoGeIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .mzaalo import MzaaloIE
|
||||
from .n1 import (
|
||||
N1InfoAssetIE,
|
||||
N1InfoIIE,
|
||||
@ -1158,6 +1213,7 @@ from .nebula import (
|
||||
NebulaSubscriptionsIE,
|
||||
NebulaChannelIE,
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .neteasemusic import (
|
||||
@ -1206,6 +1262,9 @@ from .nhk import (
|
||||
NhkForSchoolBangumiIE,
|
||||
NhkForSchoolSubjectIE,
|
||||
NhkForSchoolProgramListIE,
|
||||
NhkRadioNewsPageIE,
|
||||
NhkRadiruIE,
|
||||
NhkRadiruLiveIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
@ -1225,6 +1284,7 @@ from .niconico import (
|
||||
NicovideoSearchIE,
|
||||
NicovideoSearchURLIE,
|
||||
NicovideoTagURLIE,
|
||||
NiconicoLiveIE,
|
||||
)
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
@ -1282,6 +1342,7 @@ from .nrl import NRLTVIE
|
||||
from .ntvcojp import NTVCoJpCUIE
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nubilesporn import NubilesPornIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
@ -1292,6 +1353,7 @@ from .nzherald import NZHeraldIE
|
||||
from .nzonscreen import NZOnScreenIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odkmedia import OnDemandChinaEpisodeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oftv import (
|
||||
OfTVIE,
|
||||
@ -1332,6 +1394,7 @@ from .orf import (
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
from .owncloud import OwnCloudIE
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
@ -1357,7 +1420,7 @@ from .patreon import (
|
||||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
@ -1375,6 +1438,7 @@ from .periscope import (
|
||||
PeriscopeIE,
|
||||
PeriscopeUserIE,
|
||||
)
|
||||
from .pgatour import PGATourIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@ -1432,7 +1496,6 @@ from .polskieradio import (
|
||||
PolskieRadioPlayerIE,
|
||||
PolskieRadioPodcastIE,
|
||||
PolskieRadioPodcastListIE,
|
||||
PolskieRadioRadioKierowcowIE,
|
||||
)
|
||||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
@ -1455,6 +1518,7 @@ from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
@ -1469,6 +1533,7 @@ from .prx import (
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
@ -1501,6 +1566,8 @@ from .radlive import (
|
||||
RadLiveSeasonIE,
|
||||
)
|
||||
from .rai import (
|
||||
RaiIE,
|
||||
RaiCulturaIE,
|
||||
RaiPlayIE,
|
||||
RaiPlayLiveIE,
|
||||
RaiPlayPlaylistIE,
|
||||
@ -1509,13 +1576,16 @@ from .rai import (
|
||||
RaiPlaySoundPlaylistIE,
|
||||
RaiNewsIE,
|
||||
RaiSudtirolIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
RCSEmbedsIE,
|
||||
@ -1527,6 +1597,7 @@ from .rcti import (
|
||||
RCTIPlusTVIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .recurbate import RecurbateIE
|
||||
from .redbee import ParliamentLiveUKIE, RTBFIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
@ -1549,6 +1620,7 @@ from .rentv import (
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rice import RICEIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@ -1563,6 +1635,7 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .rozhlas import (
|
||||
RozhlasIE,
|
||||
RozhlasVltavaIE,
|
||||
MujRozhlasIE,
|
||||
)
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import (
|
||||
@ -1586,6 +1659,11 @@ from .rtnews import (
|
||||
from .rtp import RTPIE
|
||||
from .rtrfm import RTRFMIE
|
||||
from .rts import RTSIE
|
||||
from .rtvcplay import (
|
||||
RTVCPlayIE,
|
||||
RTVCPlayEmbedIE,
|
||||
RTVCKalturaIE,
|
||||
)
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
@ -1631,6 +1709,7 @@ from .ruv import (
|
||||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import S4CIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@ -1655,6 +1734,7 @@ from .scte import (
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servus import ServusIE
|
||||
@ -1752,6 +1832,7 @@ from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stageplus import StagePlusVODConcertIE
|
||||
from .startrek import StarTrekIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
@ -1777,6 +1858,10 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
@ -1789,7 +1874,6 @@ from .storyfire import (
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamanity import StreamanityIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
@ -1827,7 +1911,10 @@ from .teachertube import (
|
||||
TeacherTubeUserIE,
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .teamcoco import (
|
||||
TeamcocoIE,
|
||||
ConanClassicIE,
|
||||
)
|
||||
from .teamtreehouse import TeamTreeHouseIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import (
|
||||
@ -1839,6 +1926,7 @@ from .ted import (
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecaribe import TelecaribePlayIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telegram import TelegramEmbedIE
|
||||
@ -1853,7 +1941,7 @@ from .telequebec import (
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tempo import TempoIE
|
||||
from .tempo import TempoIE, IVXPlayerIE
|
||||
from .tencent import (
|
||||
IflixEpisodeIE,
|
||||
IflixSeriesIE,
|
||||
@ -1930,6 +2018,7 @@ from .traileraddict import TrailerAddictIE
|
||||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerUserIE,
|
||||
TrillerShortIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
@ -1951,10 +2040,9 @@ from .tubitv import (
|
||||
)
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import (
|
||||
TuneInClipIE,
|
||||
TuneInStationIE,
|
||||
TuneInProgramIE,
|
||||
TuneInTopicIE,
|
||||
TuneInPodcastIE,
|
||||
TuneInPodcastEpisodeIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .tunepk import TunePkIE
|
||||
@ -2022,7 +2110,6 @@ from .tvp import (
|
||||
)
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
@ -2181,12 +2268,16 @@ from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
ViuOTTIE,
|
||||
ViuOTTIndonesiaIE,
|
||||
)
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
VKPlayIE,
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
@ -2204,7 +2295,12 @@ from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vrt import VRTIE
|
||||
from .vrt import (
|
||||
VRTIE,
|
||||
VrtNUIE,
|
||||
KetnetIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
VRVIE,
|
||||
@ -2255,8 +2351,20 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
WeverseIE,
|
||||
WeverseMediaIE,
|
||||
WeverseMomentIE,
|
||||
WeverseLiveTabIE,
|
||||
WeverseMediaTabIE,
|
||||
WeverseLiveIE,
|
||||
)
|
||||
from .wevidi import WeVidiIE
|
||||
from .weyyak import WeyyakIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .wistia import (
|
||||
@ -2282,6 +2390,12 @@ from .wsj import (
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .wykop import (
|
||||
WykopDigIE,
|
||||
WykopDigCommentIE,
|
||||
WykopPostIE,
|
||||
WykopPostCommentIE,
|
||||
)
|
||||
from .xanimu import XanimuIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
@ -2301,13 +2415,14 @@ from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xvideos import (
|
||||
XVideosIE,
|
||||
XVideosQuickiesIE
|
||||
)
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
@ -2325,6 +2440,10 @@ from .yandexvideo import (
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yappy import (
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
@ -2342,6 +2461,10 @@ from .younow import (
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .zaiko import (
|
||||
ZaikoIE,
|
||||
ZaikoETicketIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
@ -2399,6 +2522,7 @@ from .zingmp3 import (
|
||||
ZingMp3WeekChartIE,
|
||||
ZingMp3ChartMusicVideoIE,
|
||||
ZingMp3UserIE,
|
||||
ZingMp3HubIE,
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
'id': '102520540',
|
||||
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
|
||||
video = True
|
||||
|
||||
if mobj is None:
|
||||
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
|
||||
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
|
||||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
youtube = mobj.group('type') == 'YouTube'
|
||||
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
|
||||
video = mobj.group('type') == 'Video' or traverse_obj(
|
||||
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
@ -22,80 +22,23 @@ from ..utils import (
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
@ -137,11 +80,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
@ -213,10 +156,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@ -436,6 +376,16 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
if not title:
|
||||
title = traverse_obj(api_response, ('episode', 'title'))
|
||||
if not description:
|
||||
description = traverse_obj(api_response, ('episode', 'content'))
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||
elif video_type == 'slots':
|
||||
|
@ -40,28 +40,33 @@ class ACastBaseIE(InfoExtractor):
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'description': 'md5:013959207e05011ad14a222cf22278cc',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'creator': 'Third Ear Studio',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
'episode_number': 2,
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
|
||||
'info_dict': {
|
||||
'id': '646c68fb21fbf20011e9c651',
|
||||
'ext': 'mp3',
|
||||
'creator': 'The Australian National University',
|
||||
'display_id': 'can-labor-be-a-long-form-government',
|
||||
'duration': 2618,
|
||||
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
|
||||
'title': 'Can Labor be a long-form government?',
|
||||
'episode': 'Can Labor be a long-form government?',
|
||||
'upload_date': '20230523',
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
|
@ -6,10 +6,8 @@ import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
if e.cause.status == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@ -2,11 +2,11 @@ import getpass
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
|
||||
post_url = compat_urlparse.urljoin(urlh.url, post_url)
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
@ -1573,7 +1573,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'Spectrum':
|
||||
elif mso_id in ('Spectrum', 'Charter_Direct'):
|
||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||
# as a one-off implementation.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
urlh.url, video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
urlh.url, video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
urlh.url, video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
urlh.url, video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
provider_redirect_page, url=urlh.url)
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
|
@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
info['formats'].extend(fmts)
|
||||
self._merge_subtitles(subs, target=info['subtitles'])
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
|
@ -3,6 +3,8 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@ -72,7 +74,14 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
get_all=False)
|
||||
if not result:
|
||||
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
|
||||
video_id=remove_start(filter_value, '/'))
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
@ -303,6 +313,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
|
@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import ExtractorError, traverse_obj, url_or_none
|
||||
|
||||
|
||||
class AeonCoIE(InfoExtractor):
|
||||
@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
'info_dict': {
|
||||
'id': '728595228',
|
||||
'id': '759576926',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wrought',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
||||
'uploader': 'Biofilm Productions',
|
||||
'uploader_id': 'user140352216',
|
||||
'uploader_url': 'https://vimeo.com/user140352216',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
'info_dict': {
|
||||
'id': 'emyi4z-O0ls',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
|
||||
'uploader': 'TED-Ed',
|
||||
'uploader_id': '@TEDEd',
|
||||
'uploader_url': 'https://www.youtube.com/@TEDEd',
|
||||
'duration': 344,
|
||||
'upload_date': '20200827',
|
||||
'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
|
||||
'playable_in_embed': True,
|
||||
'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
|
||||
'categories': ['Education'],
|
||||
'like_count': int,
|
||||
'channel': 'TED-Ed',
|
||||
'chapters': 'count:7',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
|
||||
'tags': 'count:26',
|
||||
'availability': 'public',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
'live_status': 'not_live',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
||||
return self.url_result(vimeo_url, VimeoIE)
|
||||
embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
|
||||
if not embed_url:
|
||||
raise ExtractorError('No embed URL found in webpage')
|
||||
if 'player.vimeo.com' in embed_url:
|
||||
embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
|
||||
return self.url_result(embed_url)
|
||||
|
@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||
'info_dict': {
|
||||
'id': '18650793',
|
||||
'ext': 'mp4',
|
||||
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '윈아디',
|
||||
'uploader_id': 'badkids',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||
'info_dict': {
|
||||
'id': '10481652',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'duration': 6492,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '20160502_c4c62b9d_174361386_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 3601,
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '20160502_39e739bb_174361386_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 2891,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/player/15055030',
|
||||
'only_matching': True,
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
||||
'upload_date': '20230108',
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -223,26 +179,21 @@ class AfreecaTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
partial_view = False
|
||||
adult_view = False
|
||||
for _ in range(2):
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view',
|
||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
if traverse_obj(data, ('code', {int})) == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'nStationNo': data['station_no'],
|
||||
'nBbsNo': data['bbs_no'],
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
|
@ -191,7 +191,7 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!,
|
||||
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:season'
|
||||
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
|
||||
IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'playlist_mincount': 6,
|
||||
@ -250,6 +250,7 @@ query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonI
|
||||
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:series'
|
||||
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'playlist_mincount': 3,
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
@ -72,6 +72,12 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -100,7 +106,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
@ -117,29 +123,73 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# America's Test Kitchen Series
|
||||
'url': 'https://www.americastestkitchen.com/',
|
||||
'info_dict': {
|
||||
'id': 'americastestkitchen',
|
||||
'title': 'America\'s Test Kitchen',
|
||||
},
|
||||
'playlist_count': 558,
|
||||
}, {
|
||||
# Cooks Country Series
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry',
|
||||
'info_dict': {
|
||||
'id': 'cookscountry',
|
||||
'title': 'Cook\'s Country',
|
||||
},
|
||||
'playlist_count': 199,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_path, season_number = self._match_valid_url(url).group('show', 'id')
|
||||
season_number = int(season_number)
|
||||
season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2')
|
||||
show_path = ('/' + show) if show else ''
|
||||
show = show or show1
|
||||
season_number = int_or_none(season_number)
|
||||
|
||||
slug = 'cco' if show_path == '/cookscountry' else 'atk'
|
||||
slug, title = {
|
||||
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
|
||||
'cookscountry': ('cco', 'Cook\'s Country'),
|
||||
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
|
||||
}[show]
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
facet_filters = [
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@ -162,4 +212,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
entries(), playlist_id, playlist_title)
|
||||
|
@ -5,6 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def _extract_feed_info(self, url):
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
@ -73,8 +74,10 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
|
98
plugins/youtube_download/yt_dlp/extractor/anchorfm.py
Normal file
98
plugins/youtube_download/yt_dlp/extractor/anchorfm.py
Normal file
@ -0,0 +1,98 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class AnchorFMEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
|
||||
'info_dict': {
|
||||
'id': 'e1tpt3d',
|
||||
'ext': 'mp3',
|
||||
'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
|
||||
'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
|
||||
'duration': 624.718,
|
||||
'uploader': 'Lovelyti ',
|
||||
'uploader_id': '991541',
|
||||
'channel': 'lovelyti',
|
||||
'modified_date': '20230121',
|
||||
'modified_timestamp': 1674285178,
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
}
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'duration': 1042.008,
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'release_date': '20221221',
|
||||
'release_timestamp': 1671595916,
|
||||
'modified_date': '20221221',
|
||||
'modified_timestamp': 1671590834,
|
||||
'channel': 'apakatatempo',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'uploader_id': '2585461',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
}
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'release_date': '20221221',
|
||||
'duration': 1042.008,
|
||||
'season': 'Season 2',
|
||||
'modified_timestamp': 1671590834,
|
||||
'uploader_id': '2585461',
|
||||
'modified_date': '20221221',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'season_number': 2,
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'release_timestamp': 1671595916,
|
||||
'episode_id': 'e1shjqd',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
|
||||
api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': traverse_obj(api_data, ('episode', 'title')),
|
||||
'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
|
||||
'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
|
||||
'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
|
||||
'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
|
||||
'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
|
||||
'episode_id': episode_id,
|
||||
'uploader': traverse_obj(api_data, ('creator', 'name')),
|
||||
'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
|
||||
'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
|
||||
'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
@ -121,7 +121,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
errnote='Could not resolve canonical player URL').url
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
|
@ -336,7 +336,7 @@ class AnvatoIE(InfoExtractor):
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
# For some videos the initial m3u8 URL returns JSON instead
|
||||
manifest_json = self._download_json(
|
||||
video_url, video_id, note='Downloading manifest JSON', errnote=False)
|
||||
video_url, video_id, note='Downloading manifest JSON', fatal=False)
|
||||
if manifest_json:
|
||||
video_url = manifest_json.get('master_m3u8')
|
||||
if not video_url:
|
||||
@ -392,14 +392,6 @@ class AnvatoIE(InfoExtractor):
|
||||
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
||||
yield cls.url_result(url, AnvatoIE, video_id)
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
|
@ -1,16 +1,16 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
@ -899,7 +899,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
@ -926,7 +926,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
@ -1052,7 +1052,7 @@ class VLiveWebArchiveIE(InfoExtractor):
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
@ -408,6 +409,23 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||
'id': '12172961',
|
||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||
'duration': 5241,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||
'timestamp': 1670710500,
|
||||
'upload_date': '20221210',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 12,
|
||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||
'series': 'Filme im MDR'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
@ -424,7 +442,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'skip': 'Error',
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'md5': 'f1837e563323b8a642a8ddeff0131f51',
|
||||
'md5': '1e73ded21cb79bac065117e80c81dc88',
|
||||
'info_dict': {
|
||||
'id': '10049223',
|
||||
'ext': 'mp4',
|
||||
@ -432,13 +450,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
|
||||
'playlist_count': 6,
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
|
||||
'title': 'beforeigners/beforeigners/staffel-1',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@ -602,6 +618,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
src
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
@ -640,6 +659,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
'thumbnail': (media_collection.get('_previewImage')
|
||||
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||
or self.get_thumbnail_from_html(display_id, url)),
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
return info
|
||||
|
||||
def get_thumbnail_from_html(self, display_id, url):
|
||||
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||
return (
|
||||
self._og_search_thumbnail(webpage, default=None)
|
||||
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
|
@ -2,11 +2,11 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@ -277,7 +277,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
if self._LOGIN_URL in urlh.url:
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
@ -388,8 +388,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
if not (isinstance(e.exc_info[1], HTTPError)
|
||||
and e.exc_info[1].status in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
@ -472,7 +472,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
@ -983,7 +983,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
|
@ -1,27 +1,197 @@
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BibelTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
||||
'md5': '252f908192d611de038b8504b08bf97f',
|
||||
'info_dict': {
|
||||
'id': 'ref:329703',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprachkurs in Malaiisch',
|
||||
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
||||
'timestamp': 1608316701,
|
||||
'uploader_id': '5840105145001',
|
||||
'upload_date': '20201218',
|
||||
class BibelTVBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
API_URL = 'https://www.bibeltv.de/mediathek/api'
|
||||
AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
|
||||
|
||||
def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
|
||||
media_ext = determine_ext(media_url)
|
||||
if media_ext == 'm3u8':
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, crn_id, live=is_live)
|
||||
formats.extend(m3u8_formats)
|
||||
subtitles.update(m3u8_subs)
|
||||
elif media_ext == 'mpd':
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
|
||||
formats.extend(mpd_formats)
|
||||
subtitles.update(mpd_subs)
|
||||
elif media_ext == 'mp4':
|
||||
formats.append({'url': media_url})
|
||||
else:
|
||||
self.report_warning(f'Unknown format {media_ext!r}')
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@staticmethod
|
||||
def _extract_base_info(data):
|
||||
return {
|
||||
'id': data['crn'],
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
'view_count': 'viewCount',
|
||||
'like_count': 'likeCount',
|
||||
}),
|
||||
'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
|
||||
'url': ('url', {url_or_none}),
|
||||
}))),
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
||||
'only_matching': True,
|
||||
|
||||
def _extract_url_info(self, data):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
|
||||
**self._extract_base_info(data),
|
||||
}
|
||||
|
||||
def _extract_video_info(self, data):
|
||||
crn_id = data['crn']
|
||||
|
||||
if data.get('drm'):
|
||||
self.report_drm(crn_id)
|
||||
|
||||
json_data = self._download_json(
|
||||
format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
|
||||
headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
|
||||
errnote='No formats available') or {}
|
||||
|
||||
formats, subtitles = self._extract_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
**self._extract_base_info(data),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BibelTVVideoIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV single video'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
|
||||
IE_NAME = 'bibeltv:video'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
|
||||
'md5': 'ec1c07efe54353780512e8a4103b612e',
|
||||
'info_dict': {
|
||||
'id': '344436',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alte Wege',
|
||||
'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
|
||||
'timestamp': 1677877071,
|
||||
'duration': 150.0,
|
||||
'upload_date': '20230303',
|
||||
'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': '6',
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
||||
video_data = traverse_obj(
|
||||
self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
|
||||
('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
|
||||
if not video_data:
|
||||
raise ExtractorError('Missing video data.')
|
||||
|
||||
return self._extract_video_info(video_data)
|
||||
|
||||
|
||||
class BibelTVSeriesIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV series playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
|
||||
IE_NAME = 'bibeltv:series'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
|
||||
'playlist_mincount': 400,
|
||||
'info_dict': {
|
||||
'id': '333485',
|
||||
'title': 'Ein Wunder für jeden Tag',
|
||||
'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, crn_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, crn_id)
|
||||
series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
|
||||
if not series_data:
|
||||
raise ExtractorError('Missing series data.')
|
||||
|
||||
return self.playlist_result(
|
||||
traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
|
||||
crn_id, series_data.get('title'), clean_html(series_data.get('description')))
|
||||
|
||||
|
||||
class BibelTVLiveIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV live program'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
|
||||
IE_NAME = 'bibeltv:live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
|
||||
'info_dict': {
|
||||
'id': 'bibeltv',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Bibel TV',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/livestreams/impuls/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
stream_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, stream_id)
|
||||
stream_data = self._search_json(
|
||||
r'\\"video\\":', webpage, 'bibeltvData', stream_id,
|
||||
transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
|
||||
|
||||
formats, subtitles = self._extract_formats_and_subtitles(
|
||||
traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
|
||||
|
||||
return {
|
||||
'id': stream_id,
|
||||
'title': stream_data.get('title'),
|
||||
'thumbnail': stream_data.get('poster'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -1,12 +1,14 @@
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import urllib.error
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
@ -16,6 +18,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@ -26,6 +29,8 @@ from ..utils import (
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
@ -81,7 +86,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
f'{line["content"]}\n\n')
|
||||
return srt_data
|
||||
|
||||
def _get_subtitles(self, video_id, initial_state, cid):
|
||||
def _get_subtitles(self, video_id, aid, cid):
|
||||
subtitles = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
@ -89,7 +94,8 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}]
|
||||
}
|
||||
|
||||
for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []:
|
||||
video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
|
||||
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
@ -130,9 +136,20 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
def _get_episodes_from_season(self, ss_id, url):
|
||||
season_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||
note='Downloading season info', query={'season_id': ss_id},
|
||||
headers={'Referer': url, **self.geo_verification_headers()})
|
||||
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@ -280,19 +297,60 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'video redirects to festival page',
|
||||
'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
|
||||
'info_dict': {
|
||||
'id': 'BV1wP4y1P72h',
|
||||
'ext': 'mp4',
|
||||
'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
|
||||
'timestamp': 1643947497,
|
||||
'upload_date': '20220204',
|
||||
'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
|
||||
'uploader': '叨叨冯聊音乐',
|
||||
'duration': 246.719,
|
||||
'uploader_id': '528182630',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
'info_dict': {
|
||||
'id': 'BV1ay4y1d77f',
|
||||
'ext': 'mp4',
|
||||
'title': '【崩坏3新春剧场】为特别的你送上祝福!',
|
||||
'timestamp': 1674273600,
|
||||
'upload_date': '20230121',
|
||||
'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
|
||||
'uploader': '果蝇轰',
|
||||
'duration': 1111.722,
|
||||
'uploader_id': '8469526',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
|
||||
video_data = initial_state['videoData']
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = traverse_obj(
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
@ -315,99 +373,135 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note='Extracting festival video formats')['data']
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
'like_count': ('videoStatus', 'like', {int_or_none}),
|
||||
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
|
||||
}, get_all=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(initial_state, {
|
||||
'uploader': ('upData', 'name'),
|
||||
'uploader_id': ('upData', 'mid', {str_or_none}),
|
||||
'like_count': ('videoData', 'stat', 'like', {int_or_none}),
|
||||
'tags': ('tags', ..., 'tag_name'),
|
||||
'thumbnail': ('videoData', 'pic', {url_or_none}),
|
||||
}),
|
||||
**festival_info,
|
||||
**traverse_obj(video_data, {
|
||||
'description': 'desc',
|
||||
'timestamp': ('pubdate', {int_or_none}),
|
||||
'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
|
||||
'comment_count': ('stat', 'reply', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||
'title': title,
|
||||
'description': traverse_obj(initial_state, ('videoData', 'desc')),
|
||||
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
|
||||
'uploader': traverse_obj(initial_state, ('upData', 'name')),
|
||||
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
|
||||
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
|
||||
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
|
||||
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
|
||||
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
|
||||
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, initial_state, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'episode': '残酷',
|
||||
'episode_id': '267851',
|
||||
'episode_number': 1,
|
||||
'title': '1 残酷',
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
@ -420,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
@ -452,21 +556,65 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
@ -494,9 +642,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/audio',
|
||||
'url': 'https://space.bilibili.com/313580179/audio',
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
@ -894,15 +1042,15 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if not Cryptodome:
|
||||
if not Cryptodome.RSA:
|
||||
raise ExtractorError('pycryptodomex not found. Please install', expected=True)
|
||||
|
||||
key_data = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
|
||||
note='Downloading login key', errnote='Unable to download login key')['data']
|
||||
|
||||
public_key = Cryptodome.PublicKey.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.Cipher.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'username': username,
|
||||
@ -995,6 +1143,53 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
'info_dict': {
|
||||
'id': '340317',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1604057820,
|
||||
'upload_date': '20201030',
|
||||
'episode_number': 5,
|
||||
'title': 'E5 - My Own Steel',
|
||||
'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
|
||||
'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode': 'Episode 5',
|
||||
'comment_count': int,
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 61.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
}, {
|
||||
'start_time': 61.0,
|
||||
'end_time': 134.0,
|
||||
'title': 'Intro'
|
||||
}, {
|
||||
'start_time': 1290.0,
|
||||
'end_time': 1379.0,
|
||||
'title': 'Outro'
|
||||
}],
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# user generated content comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||
'info_dict': {
|
||||
'id': '2045730385',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# episode id without intro and outro
|
||||
@ -1054,11 +1249,69 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/detail', display_id,
|
||||
note=f'Downloading reply comment of {root_id} - {next_id}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'ps': 20, # comment's reply per page (default: 3)
|
||||
'root': root_id,
|
||||
'next': next_id,
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'parent': replies.get('parent'),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||
}
|
||||
|
||||
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
yield from self._get_comments_reply(
|
||||
root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
|
||||
|
||||
def _get_comments(self, video_id, ep_id):
|
||||
for i in itertools.count(0):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/root', video_id,
|
||||
note=f'Downloading comment page {i + 1}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'pn': i, # page number
|
||||
'ps': 20, # comment per page (default: 20)
|
||||
'oid': video_id,
|
||||
'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
|
||||
'sort_type': 1, # 1: best, 2: recent
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||
'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
|
||||
}
|
||||
if replies.get('count'):
|
||||
yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
|
||||
|
||||
if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||
video_id = ep_id or aid
|
||||
@ -1086,7 +1339,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
**self._extract_video_metadata(url, video_id, season_id),
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,9 +2,9 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
||||
for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
|
||||
'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
|
||||
'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
|
||||
'seedp29xb', 'zb10-7gsop1v78'))
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
|
167
plugins/youtube_download/yt_dlp/extractor/blerp.py
Normal file
167
plugins/youtube_download/yt_dlp/extractor/blerp.py
Normal file
@ -0,0 +1,167 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import strip_or_none, traverse_obj
|
||||
|
||||
|
||||
class BlerpIE(InfoExtractor):
|
||||
IE_NAME = 'blerp'
|
||||
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||
'info_dict': {
|
||||
'id': '6320fe8745636cb4dd677a5a',
|
||||
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||
'uploader': 'luminousaj',
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
'id': '5bc94ef4796001000498429f',
|
||||
'title': 'Yee',
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
biteById(_id: $_id) {
|
||||
...bitePageFrag
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment bitePageFrag on Bite {
|
||||
_id
|
||||
title
|
||||
userKeywords
|
||||
keywords
|
||||
color
|
||||
visibility
|
||||
isPremium
|
||||
owned
|
||||
price
|
||||
extraReview
|
||||
isAudioExists
|
||||
image {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
userReactions {
|
||||
_id
|
||||
reactions
|
||||
createdAt
|
||||
__typename
|
||||
}
|
||||
topReactions
|
||||
totalSaveCount
|
||||
saved
|
||||
blerpLibraryType
|
||||
license
|
||||
licenseMetaData
|
||||
playCount
|
||||
totalShareCount
|
||||
totalFavoriteCount
|
||||
totalAddedToBoardCount
|
||||
userCategory
|
||||
userAudioQuality
|
||||
audioCreationState
|
||||
transcription
|
||||
userTranscription
|
||||
description
|
||||
createdAt
|
||||
updatedAt
|
||||
author
|
||||
listingType
|
||||
ownerObject {
|
||||
_id
|
||||
username
|
||||
profileImage {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
transcription
|
||||
favorited
|
||||
visibility
|
||||
isCurated
|
||||
sourceUrl
|
||||
audienceRating
|
||||
strictAudienceRating
|
||||
ownerId
|
||||
reportObject {
|
||||
reportedContentStatus
|
||||
__typename
|
||||
}
|
||||
giphy {
|
||||
mp4
|
||||
gif
|
||||
__typename
|
||||
}
|
||||
audio {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
mp3 {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
''')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = {
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
}
|
||||
|
||||
return info_dict
|
@ -1,86 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, traverse_obj
|
||||
|
||||
|
||||
class BooyahBaseIE(InfoExtractor):
|
||||
_BOOYAH_SESSION_KEY = None
|
||||
|
||||
def _real_initialize(self):
|
||||
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
|
||||
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
comment_json = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
|
||||
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
|
||||
|
||||
return [{
|
||||
'id': comment.get('comment_id'),
|
||||
'author': comment.get('from_nickname'),
|
||||
'author_id': comment.get('from_uid'),
|
||||
'author_thumbnail': comment.get('from_thumbnail'),
|
||||
'text': comment.get('content'),
|
||||
'timestamp': comment.get('create_time'),
|
||||
'like_count': comment.get('like_cnt'),
|
||||
} for comment in comment_json.get('comment_list') or ()]
|
||||
|
||||
|
||||
class BooyahClipsIE(BooyahBaseIE):
|
||||
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://booyah.live/clips/13887261322952306617',
|
||||
'info_dict': {
|
||||
'id': '13887261322952306617',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'duration': 30,
|
||||
'channel_id': 90565760,
|
||||
'like_count': int,
|
||||
'title': 'Cayendo con estilo 😎',
|
||||
'uploader': '♡LɪꜱGΛMER',
|
||||
'comment_count': int,
|
||||
'uploader_id': '90565760',
|
||||
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
|
||||
'upload_date': '20220617',
|
||||
'timestamp': 1655490556,
|
||||
'modified_timestamp': 1655490556,
|
||||
'modified_date': '20220617',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
|
||||
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
|
||||
|
||||
formats = []
|
||||
for video_data in json_data['playback']['endpoint_list']:
|
||||
formats.extend(({
|
||||
'url': video_data.get('stream_url'),
|
||||
'ext': 'mp4',
|
||||
'height': video_data.get('resolution'),
|
||||
}, {
|
||||
'url': video_data.get('download_url'),
|
||||
'ext': 'mp4',
|
||||
'format_note': 'Watermarked',
|
||||
'height': video_data.get('resolution'),
|
||||
'preference': -10,
|
||||
}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(json_data, ('playback', 'name')),
|
||||
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
|
||||
'formats': formats,
|
||||
'view_count': traverse_obj(json_data, ('playback', 'views')),
|
||||
'like_count': traverse_obj(json_data, ('playback', 'likes')),
|
||||
'duration': traverse_obj(json_data, ('playback', 'duration')),
|
||||
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
|
||||
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
|
||||
'uploader': traverse_obj(json_data, ('user', 'nickname')),
|
||||
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
|
||||
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
|
||||
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
|
||||
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
|
||||
}
|
102
plugins/youtube_download/yt_dlp/extractor/boxcast.py
Normal file
102
plugins/youtube_download/yt_dlp/extractor/boxcast.py
Normal file
@ -0,0 +1,102 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class BoxCastVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://boxcast\.tv/(?:
|
||||
view-embed/|
|
||||
channel/\w+\?(?:[^#]+&)?b=|
|
||||
video-portal/(?:\w+/){2}
|
||||
)(?P<id>[\w-]+)'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
|
||||
'info_dict': {
|
||||
'id': 'da1eqqgkacngd5djlqld',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
|
||||
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
|
||||
'release_timestamp': 1670686812,
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
|
||||
'info_dict': {
|
||||
'id': 'otbpltj2kzkveo2qz3ad',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'vctwevwntun3o0ikq7af',
|
||||
'uploader': 'Legacy Christian Church',
|
||||
'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
|
||||
'info_dict': {
|
||||
'id': 'ssihlw5gvfij2by8tkev',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
|
||||
'release_date': '20230101',
|
||||
'uploader_id': 'ds25vaazhlu4ygcvffid',
|
||||
'release_timestamp': 1672543201,
|
||||
'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
|
||||
'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
|
||||
'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://childrenshealthdefense.eu/live-stream/',
|
||||
'info_dict': {
|
||||
'id': 'da1eqqgkacngd5djlqld',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
|
||||
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
|
||||
'release_timestamp': 1670686812,
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage_json_data = self._search_json(
|
||||
r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
|
||||
transform_source=js_to_json, default={})
|
||||
|
||||
# Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
|
||||
broadcast_json_data = (
|
||||
traverse_obj(webpage_json_data, ('broadcast', 'data'))
|
||||
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
|
||||
view_json_data = (
|
||||
traverse_obj(webpage_json_data, ('view', 'data'))
|
||||
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
|
||||
display_id, fatal=False) or {})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
if view_json_data.get('status') == 'recorded':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
view_json_data['playlist'], display_id)
|
||||
|
||||
return {
|
||||
'id': str(broadcast_json_data['id']),
|
||||
'title': (broadcast_json_data.get('name')
|
||||
or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
|
||||
'description': (broadcast_json_data.get('description')
|
||||
or self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||
or None),
|
||||
'thumbnail': (broadcast_json_data.get('preview')
|
||||
or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
|
||||
'uploader': broadcast_json_data.get('account_name'),
|
||||
'uploader_id': broadcast_json_data.get('account_id'),
|
||||
}
|
318
plugins/youtube_download/yt_dlp/extractor/brainpop.py
Normal file
318
plugins/youtube_download/yt_dlp/extractor/brainpop.py
Normal file
@ -0,0 +1,318 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
class BrainPOPBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brainpop'
|
||||
_ORIGIN = '' # So that _VALID_URL doesn't crash
|
||||
_LOGIN_ERRORS = {
|
||||
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
|
||||
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
|
||||
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
|
||||
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
|
||||
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
|
||||
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
|
||||
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
|
||||
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
|
||||
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
|
||||
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
|
||||
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
|
||||
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
|
||||
}
|
||||
|
||||
@classproperty
|
||||
def _VALID_URL(cls):
|
||||
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
|
||||
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
|
||||
|
||||
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
|
||||
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
|
||||
})
|
||||
for f in formats:
|
||||
f.update(extra_fields)
|
||||
return formats
|
||||
|
||||
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
|
||||
formats = []
|
||||
additional_key_formats = {
|
||||
'%s': {},
|
||||
'ad_%s': {
|
||||
'format_note': 'Audio description',
|
||||
'source_preference': -2
|
||||
}
|
||||
}
|
||||
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||
for key_quality, key_index in enumerate(('high', 'low')):
|
||||
full_key_index = additional_key_format % (key_format % key_index)
|
||||
if data.get(full_key_index):
|
||||
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||
'quality': -1 - key_quality,
|
||||
**additional_key_fields,
|
||||
**extra_fields
|
||||
}))
|
||||
return formats
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_res = self._download_json(
|
||||
'https://api.brainpop.com/api/login', None,
|
||||
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': self._ORIGIN
|
||||
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||
status_code = int_or_none(login_res['status_code'])
|
||||
if status_code != 1505:
|
||||
self.report_warning(
|
||||
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
|
||||
or f'Got status code {status_code}')
|
||||
|
||||
|
||||
class BrainPOPIE(BrainPOPBaseIE):
|
||||
_ORIGIN = 'https://www.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
|
||||
'md5': '3ead374233ae74c7f1b0029a01c972f0',
|
||||
'info_dict': {
|
||||
'id': '1f3259fa457292b4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Luther King, Jr.',
|
||||
'display_id': 'martinlutherkingjr',
|
||||
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brainpop.com/science/space/bigbang/',
|
||||
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
|
||||
'info_dict': {
|
||||
'id': 'acae52cd48c99acf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Bang',
|
||||
'display_id': 'bigbang',
|
||||
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
movie_data = self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
|
||||
'Downloading movie data JSON', 'Unable to download movie data')['data']
|
||||
topic_data = traverse_obj(self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
|
||||
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
|
||||
('data', 'topic'), expected_type=dict) or movie_data['topic']
|
||||
|
||||
if not traverse_obj(movie_data, ('access', 'allow')):
|
||||
reason = traverse_obj(movie_data, ('access', 'reason'))
|
||||
if 'logged' in reason:
|
||||
self.raise_login_required(reason, metadata_available=True)
|
||||
else:
|
||||
self.raise_no_formats(reason, video_id=display_id)
|
||||
movie_feature = movie_data['feature']
|
||||
movie_feature_data = movie_feature['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||
'language': movie_feature.get('language') or 'en',
|
||||
'language_preference': 10
|
||||
}))
|
||||
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||
'language': lang,
|
||||
'language_preference': -10
|
||||
}))
|
||||
|
||||
# TODO: Do localization fields also have subtitles?
|
||||
for name, url in movie_feature_data.items():
|
||||
lang = self._search_regex(
|
||||
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||
if lang and url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': urljoin(self._CDN_URL, url)
|
||||
})
|
||||
|
||||
return {
|
||||
'id': topic_data['topic_id'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
|
||||
def _parse_js_topic_data(self, topic_data, display_id, token):
|
||||
movie_data = topic_data['movies']
|
||||
# TODO: Are there non-burned subtitles?
|
||||
formats = self._extract_adaptive_formats(movie_data, token, display_id)
|
||||
|
||||
return {
|
||||
'id': topic_data['EntryID'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'alt_title': topic_data.get('title'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
topic_data = self._search_json(
|
||||
r'var\s+content\s*=\s*', webpage, 'content data',
|
||||
display_id, end_pattern=';')['category']['unit']['topic']
|
||||
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
|
||||
return self._parse_js_topic_data(topic_data, display_id, token)
|
||||
|
||||
|
||||
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://jr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
|
||||
_HLS_URL = 'https://hls-jr.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-jr.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
|
||||
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
|
||||
'info_dict': {
|
||||
'id': '347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emotions',
|
||||
'display_id': 'emotions',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
|
||||
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
|
||||
'info_dict': {
|
||||
'id': '29',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arctic Habitats',
|
||||
'display_id': 'arctichabitats',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://ell.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
|
||||
_HLS_URL = 'https://hls-esl.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-esl.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
|
||||
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1',
|
||||
'display_id': 'lesson1',
|
||||
'alt_title': 'Personal Pronouns',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
|
||||
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
|
||||
'info_dict': {
|
||||
'id': '101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 5',
|
||||
'display_id': 'lesson5',
|
||||
'alt_title': 'Review: Unit 6',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Español'
|
||||
_ORIGIN = 'https://esp.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/mx'
|
||||
_TESTS = [{
|
||||
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
|
||||
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
|
||||
'info_dict': {
|
||||
'id': '3893',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ecosistemas',
|
||||
'display_id': 'ecosistemas',
|
||||
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
|
||||
'md5': '98c1b9559e0e33777209c425cda7dac4',
|
||||
'info_dict': {
|
||||
'id': '7146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emily Dickinson',
|
||||
'display_id': 'emily_dickinson',
|
||||
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Français'
|
||||
_ORIGIN = 'https://fr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/fr'
|
||||
_TESTS = [{
|
||||
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
|
||||
'md5': '97e7f48af8af93f8a2be11709f239371',
|
||||
'info_dict': {
|
||||
'id': '1651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sources d\'énergie',
|
||||
'display_id': 'sourcesdenergie',
|
||||
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
|
||||
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
|
||||
'info_dict': {
|
||||
'id': '5803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plagiat',
|
||||
'display_id': 'plagiat',
|
||||
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Hebrew'
|
||||
_ORIGIN = 'https://il.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/he'
|
||||
_TESTS = [{
|
||||
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
|
||||
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
|
||||
'info_dict': {
|
||||
'id': '3782',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e993632fcda0545d9205602ec314ad67',
|
||||
'display_id': 'subjects_3782',
|
||||
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
|
||||
},
|
||||
}]
|
@ -1,117 +1,189 @@
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
'info_dict': {
|
||||
'id': 'epL0pmK1kQlT',
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('ls_tve')
|
||||
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
||||
if mobj:
|
||||
account_pid, tp_path = mobj.groups()
|
||||
release_pid = tp_path.strip('/').split('/')[-1]
|
||||
else:
|
||||
account_pid = 'HNK2IC'
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
tp_path = release_pid = metadata.get('release_pid')
|
||||
if not release_pid:
|
||||
release_pid = metadata['guid']
|
||||
tp_path = 'media/guid/2140479951/' + release_pid
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
}))
|
||||
}
|
||||
|
@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
@ -575,6 +575,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
self.raise_no_formats(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
headers.pop('Authorization', None) # or else http formats will give error 400
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
@ -895,8 +896,9 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {}
|
||||
token = smuggled_data.get('token')
|
||||
api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
|
||||
headers = {'Authorization': f'Bearer {token}'} if token else {}
|
||||
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
|
||||
if referrer:
|
||||
headers.update({
|
||||
@ -913,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
traverse_obj,
|
||||
float_or_none,
|
||||
int_or_none
|
||||
)
|
||||
from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
@ -35,6 +31,54 @@ class CallinIE(InfoExtractor):
|
||||
'episode_number': 1,
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||
'info_dict': {
|
||||
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'ext': 'ts',
|
||||
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'description': 'Or, why the government doesn’t like SpaceX',
|
||||
'channel': 'The Pull Request',
|
||||
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||
'duration': 3182.472,
|
||||
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'uploader_url': 'http://thepullrequest.com',
|
||||
'upload_date': '20220902',
|
||||
'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'series': 'The Pull Request',
|
||||
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'view_count': int,
|
||||
'uploader': 'Antonio García Martínez',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
|
||||
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'timestamp': 1662100688.005,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||
'info_dict': {
|
||||
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'ext': 'ts',
|
||||
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||
'duration': 10043.16,
|
||||
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'uploader_url': 'http://patreon.com/badfaithpodcast',
|
||||
'upload_date': '20220826',
|
||||
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'display_id': 'episode-',
|
||||
'series': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'view_count': int,
|
||||
'uploader': 'Briahna Gray',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
|
||||
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'timestamp': 1661476708.282,
|
||||
}
|
||||
}]
|
||||
|
||||
def try_get_user_name(self, d):
|
||||
@ -86,6 +130,7 @@ class CallinIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
|
85
plugins/youtube_download/yt_dlp/extractor/camfm.py
Normal file
85
plugins/youtube_download/yt_dlp/extractor/camfm.py
Normal file
@ -0,0 +1,85 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CamFMShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'playlist_mincount': 5,
|
||||
'url': 'https://camfm.co.uk/shows/soul-mining/',
|
||||
'info_dict': {
|
||||
'id': 'soul-mining',
|
||||
'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
|
||||
'title': 'Soul Mining',
|
||||
'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
page = self._download_webpage(url, show_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE)
|
||||
for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)],
|
||||
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||
r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)),
|
||||
'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False),
|
||||
'description': clean_html(get_element_by_class('small-12 medium-8 cell', page))
|
||||
}
|
||||
|
||||
|
||||
class CamFMEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://camfm.co.uk/player/43336',
|
||||
'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
|
||||
'info_dict': {
|
||||
'id': '43336',
|
||||
'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20230516',
|
||||
'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
|
||||
'timestamp': 1684263600,
|
||||
'series': 'AITAA: Am I the Agony Aunt?',
|
||||
'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
|
||||
'categories': ['Entertainment'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
page = self._download_webpage(url, episode_id)
|
||||
audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id)
|
||||
|
||||
caption = get_element_by_class('caption', page)
|
||||
series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption))
|
||||
|
||||
card_section = get_element_by_class('card-section', page)
|
||||
date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': join_nonempty(series, date, delim=' - '),
|
||||
'formats': traverse_obj(audios, (..., 'formats', ...)),
|
||||
'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings
|
||||
'series': series,
|
||||
'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)),
|
||||
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||
r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)',
|
||||
page, 'thumbnail', fatal=False)),
|
||||
'categories': get_elements_by_class('label', caption),
|
||||
'was_live': True,
|
||||
}
|
@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils import int_or_none, url_or_none
|
||||
|
||||
|
||||
class CamModelsIE(InfoExtractor):
|
||||
@ -17,32 +13,11 @@ class CamModelsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, headers=self.geo_verification_headers())
|
||||
|
||||
manifest_root = self._html_search_regex(
|
||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||
|
||||
if not manifest_root:
|
||||
ERRORS = (
|
||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||
('in a private show', 'This user is in a private show'),
|
||||
('is currently performing LIVE', 'This model is currently performing live'),
|
||||
)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
error = message
|
||||
expected = True
|
||||
break
|
||||
else:
|
||||
error = 'Unable to find manifest URL root'
|
||||
expected = False
|
||||
raise ExtractorError(error, expected=expected)
|
||||
|
||||
manifest = self._download_json(
|
||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for format_id, format_dict in manifest['formats'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
@ -82,12 +57,20 @@ class CamModelsIE(InfoExtractor):
|
||||
'quality': -10,
|
||||
})
|
||||
else:
|
||||
if format_id == 'jpeg':
|
||||
thumbnails.append({
|
||||
'url': f['url'],
|
||||
'width': f['width'],
|
||||
'height': f['height'],
|
||||
'format_id': f['format_id'],
|
||||
})
|
||||
continue
|
||||
formats.append(f)
|
||||
|
||||
return {
|
||||
'id': user_id,
|
||||
'title': user_id,
|
||||
'thumbnails': thumbnails,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
|
@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
|
||||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# if '/blocage' in response.url:
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
@ -1,383 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'Nachtwacht: De Greystook',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8_native',
|
||||
}
|
||||
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
data = None
|
||||
if site_id != 'vrtvideo':
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
|
||||
video_id, note='refreshtoken: Retrieve vrtnutoken',
|
||||
errnote='refreshtoken failed')['vrtnutoken']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({'Content-Type': 'application/json; charset=utf-8'})
|
||||
vrtPlayerToken = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', headers=headers, data=json.dumps({
|
||||
'identityToken': vrtnutoken
|
||||
}).encode('utf-8'))['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'vrtPlayerToken': vrtPlayerToken,
|
||||
'client': 'null',
|
||||
}, expected_status=400)
|
||||
if 'title' not in data:
|
||||
code = data.get('code')
|
||||
if code == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required()
|
||||
elif code == 'INVALID_LOCATION':
|
||||
self. |