Updated ytdlp version
This commit is contained in:
parent
5264103f31
commit
ee3e042b1b
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@ import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from .compat import compat_shlex_quote
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
@ -56,11 +57,11 @@ from .utils import (
|
||||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
@ -187,8 +188,8 @@ def validate_options(opts):
|
||||
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
|
||||
|
||||
# Usernames and passwords
|
||||
validate(not opts.usenetrc or (opts.username is None and opts.password is None),
|
||||
'.netrc', msg='using {name} conflicts with giving username/password')
|
||||
validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
|
||||
msg='{name}, netrc command and username/password are mutually exclusive options')
|
||||
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
|
||||
validate(opts.ap_password is None or opts.ap_username is not None,
|
||||
'TV Provider account username', msg='{name} missing')
|
||||
@ -318,31 +319,50 @@ def validate_options(opts):
|
||||
if outtmpl_default == '':
|
||||
opts.skip_download = None
|
||||
del opts.outtmpl['default']
|
||||
if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
|
||||
raise ValueError(
|
||||
'Cannot download a video and extract audio into the same file! '
|
||||
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
|
||||
|
||||
def parse_chapters(name, value):
|
||||
chapters, ranges = [], []
|
||||
def parse_chapters(name, value, advanced=False):
|
||||
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
||||
TIMESTAMP_RE = r'''(?x)(?:
|
||||
(?P<start_sign>-?)(?P<start>[^-]+)
|
||||
)?\s*-\s*(?:
|
||||
(?P<end_sign>-?)(?P<end>[^-]+)
|
||||
)?'''
|
||||
|
||||
chapters, ranges, from_url = [], [], False
|
||||
for regex in value or []:
|
||||
if regex.startswith('*'):
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
|
||||
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
|
||||
if None in (dur or [None]):
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
|
||||
ranges.append(dur)
|
||||
if advanced and regex == '*from-url':
|
||||
from_url = True
|
||||
continue
|
||||
elif not regex.startswith('*'):
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
return chapters, ranges
|
||||
continue
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
|
||||
dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
|
||||
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
|
||||
|
||||
err = None
|
||||
if None in (dur or [None]):
|
||||
err = 'Must be of the form "*start-end"'
|
||||
elif not advanced and any(signs):
|
||||
err = 'Negative timestamps are not allowed'
|
||||
else:
|
||||
dur[0] *= -1 if signs[0] else 1
|
||||
dur[1] *= -1 if signs[1] else 1
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
if err:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". {err}')
|
||||
ranges.append(dur)
|
||||
|
||||
return chapters, ranges, from_url
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
@ -400,14 +420,19 @@ def validate_options(opts):
|
||||
except Exception as err:
|
||||
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
|
||||
|
||||
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
|
||||
if geo_bypass_code is not None:
|
||||
opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
|
||||
if opts.geo_bypass.lower() not in ('default', 'never'):
|
||||
try:
|
||||
GeoUtils.random_ipv4(geo_bypass_code)
|
||||
GeoUtils.random_ipv4(opts.geo_bypass)
|
||||
except Exception:
|
||||
raise ValueError('unsupported geo-bypass country or ip-block')
|
||||
raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
|
||||
if len(opts.geo_bypass) == 2:
|
||||
opts.geo_bypass_country = opts.geo_bypass
|
||||
else:
|
||||
opts.geo_bypass_ip_block = opts.geo_bypass
|
||||
opts.geo_bypass = opts.geo_bypass.lower() != 'never'
|
||||
|
||||
opts.match_filter = match_filter_func(opts.match_filter)
|
||||
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
|
||||
|
||||
if opts.download_archive is not None:
|
||||
opts.download_archive = expand_path(opts.download_archive)
|
||||
@ -434,6 +459,10 @@ def validate_options(opts):
|
||||
elif ed and proto == 'default':
|
||||
default_downloader = ed.get_basename()
|
||||
|
||||
for policy in opts.color.values():
|
||||
if policy not in ('always', 'auto', 'no_color', 'never'):
|
||||
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||
|
||||
warnings, deprecation_warnings = [], []
|
||||
|
||||
# Common mistake: -f best
|
||||
@ -708,7 +737,8 @@ def parse_options(argv=None):
|
||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
))
|
||||
opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint)
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
|
||||
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
|
||||
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
|
||||
@ -734,6 +764,7 @@ def parse_options(argv=None):
|
||||
return ParsedOptions(parser, opts, urls, {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'netrc_location': opts.netrc_location,
|
||||
'netrc_cmd': opts.netrc_cmd,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
@ -891,7 +922,7 @@ def parse_options(argv=None):
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
'match_filter': opts.match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'color': opts.color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
@ -935,7 +966,8 @@ def _real_main(argv=None):
|
||||
if opts.rm_cachedir:
|
||||
ydl.cache.remove()
|
||||
|
||||
updater = Updater(ydl)
|
||||
try:
|
||||
updater = Updater(ydl, opts.update_self)
|
||||
if opts.update_self and updater.update() and actual_use:
|
||||
if updater.cmd:
|
||||
return updater.restart()
|
||||
@ -943,6 +975,9 @@ def _real_main(argv=None):
|
||||
# It makes sense to exit here, but the old behavior is to continue
|
||||
ydl.report_warning('Restart yt-dlp to use the updated version')
|
||||
# return 100, 'ERROR: The program must exit for the update to complete'
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
ydl._download_retcode = 100
|
||||
|
||||
if not actual_use:
|
||||
if pre_process:
|
||||
@ -956,6 +991,8 @@ def _real_main(argv=None):
|
||||
parser.destroy()
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
if all_urls:
|
||||
ydl.report_warning('URLs are ignored due to --load-info-json')
|
||||
return ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||
else:
|
||||
return ydl.download(all_urls)
|
||||
|
@ -1,30 +1,8 @@
|
||||
import ast
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
|
||||
|
||||
def find_attribute_accesses(node, name, path=()):
|
||||
if isinstance(node, ast.Attribute):
|
||||
path = [*path, node.attr]
|
||||
if isinstance(node.value, ast.Name) and node.value.id == name:
|
||||
yield path[::-1]
|
||||
for child in ast.iter_child_nodes(node):
|
||||
yield from find_attribute_accesses(child, name, path)
|
||||
|
||||
|
||||
def collect_used_submodules(name, level):
|
||||
for dirpath, _, filenames in os.walk(Path(__file__).parent.parent):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.py'):
|
||||
continue
|
||||
with open(Path(dirpath) / filename, encoding='utf8') as f:
|
||||
for submodule in find_attribute_accesses(ast.parse(f.read()), name):
|
||||
yield '.'.join(submodule[:level])
|
||||
|
||||
|
||||
def pycryptodome_module():
|
||||
try:
|
||||
import Cryptodome # noqa: F401
|
||||
@ -40,13 +18,10 @@ def pycryptodome_module():
|
||||
|
||||
|
||||
def get_hidden_imports():
|
||||
yield 'yt_dlp.compat._legacy'
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
|
||||
crypto = pycryptodome_module()
|
||||
for sm in set(collect_used_submodules('Cryptodome', 2)):
|
||||
yield f'{crypto}.{sm}'
|
||||
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
|
||||
|
@ -5,14 +5,14 @@ from .compat import compat_ord
|
||||
from .dependencies import Cryptodome
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
if Cryptodome:
|
||||
if Cryptodome.AES:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
""" Decrypt bytes with AES-CBC using pycryptodome """
|
||||
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data)
|
||||
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data)
|
||||
|
||||
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
""" Decrypt bytes with AES-GCM using pycryptodome """
|
||||
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
|
||||
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
|
||||
|
||||
else:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
|
@ -1,5 +1,4 @@
|
||||
import contextlib
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -39,11 +38,7 @@ class Cache:
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
os.makedirs(os.path.dirname(fn), exist_ok=True)
|
||||
self._ydl.write_debug(f'Saving {section}.{key} to cache')
|
||||
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
|
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
@ -0,0 +1,5 @@
|
||||
import warnings
|
||||
|
||||
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
|
||||
|
||||
casefold = str.casefold
|
@ -1,14 +1,11 @@
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||
passthrough_module(__name__, '._deprecated')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
@ -70,3 +67,13 @@ if compat_os_name in ('nt', 'ce'):
|
||||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
def urllib_req_to_req(urllib_request):
|
||||
"""Convert urllib Request to a networking Request"""
|
||||
from ..networking import Request
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
return Request(
|
||||
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
|
||||
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
|
||||
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
|
||||
|
@ -1,4 +1,12 @@
|
||||
"""Deprecated - New code should avoid these"""
|
||||
import warnings
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
@ -8,7 +16,6 @@ compat_str = str
|
||||
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
|
@ -1,5 +1,6 @@
|
||||
""" Do not use! """
|
||||
|
||||
import base64
|
||||
import collections
|
||||
import ctypes
|
||||
import getpass
|
||||
@ -15,12 +16,12 @@ import shlex
|
||||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import tokenize
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
@ -29,10 +30,11 @@ from asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
@ -47,41 +49,48 @@ def compat_setenv(key, value, env=os.environ):
|
||||
env[key] = value
|
||||
|
||||
|
||||
compat_base64_b64decode = base64.b64decode
|
||||
compat_basestring = str
|
||||
compat_casefold = str.casefold
|
||||
compat_chr = chr
|
||||
compat_collections_abc = collections.abc
|
||||
compat_cookiejar = http.cookiejar
|
||||
compat_cookiejar_Cookie = http.cookiejar.Cookie
|
||||
compat_cookies = http.cookies
|
||||
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||
compat_etree_Element = etree.Element
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
compat_cookiejar = compat_http_cookiejar = http.cookiejar
|
||||
compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
|
||||
compat_cookies = compat_http_cookies = http.cookies
|
||||
compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||
compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
|
||||
compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
|
||||
compat_filter = filter
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
compat_getenv = os.getenv
|
||||
compat_getpass = getpass.getpass
|
||||
compat_getpass = compat_getpass_getpass = getpass.getpass
|
||||
compat_html_entities = html.entities
|
||||
compat_html_entities_html5 = html.entities.html5
|
||||
compat_HTMLParser = html.parser.HTMLParser
|
||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
compat_os_path_realpath = compat_realpath
|
||||
compat_print = print
|
||||
compat_shlex_split = shlex.split
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
compat_Struct = struct.Struct
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
||||
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
||||
@ -90,8 +99,10 @@ compat_urllib_parse_urlunparse = urllib.parse.urlunparse
|
||||
compat_urllib_request = urllib.request
|
||||
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
||||
compat_urllib_response = urllib.response
|
||||
compat_urlretrieve = urllib.request.urlretrieve
|
||||
compat_xml_parse_error = etree.ParseError
|
||||
compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
|
||||
compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
workaround_optparse_bug9161 = lambda: None
|
||||
|
||||
legacy = []
|
||||
|
@ -48,7 +48,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
|
||||
"""Passthrough parent module into a child module, creating the parent if necessary"""
|
||||
def __getattr__(attr):
|
||||
if _is_package(parent):
|
||||
with contextlib.suppress(ImportError):
|
||||
with contextlib.suppress(ModuleNotFoundError):
|
||||
return importlib.import_module(f'.{attr}', parent.__name__)
|
||||
|
||||
ret = from_child(attr)
|
||||
|
13
plugins/youtube_download/yt_dlp/compat/types.py
Normal file
13
plugins/youtube_download/yt_dlp/compat/types.py
Normal file
@ -0,0 +1,13 @@
|
||||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
10
plugins/youtube_download/yt_dlp/compat/urllib/__init__.py
Normal file
10
plugins/youtube_download/yt_dlp/compat/urllib/__init__.py
Normal file
@ -0,0 +1,10 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib')
|
||||
del passthrough_module
|
40
plugins/youtube_download/yt_dlp/compat/urllib/request.py
Normal file
40
plugins/youtube_download/yt_dlp/compat/urllib/request.py
Normal file
@ -0,0 +1,40 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib.request import * # noqa: F403
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib.request')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
from .. import compat_os_name
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||
# it to http on these older python versions to avoid issues
|
||||
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
|
||||
# 1: https://github.com/python/cpython/issues/86793
|
||||
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
|
||||
import sys
|
||||
from urllib.request import getproxies_environment, getproxies_registry
|
||||
|
||||
def getproxies_registry_patched():
|
||||
proxies = getproxies_registry()
|
||||
if (
|
||||
sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
|
||||
or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final
|
||||
):
|
||||
return proxies
|
||||
|
||||
for scheme in ('https', 'ftp'):
|
||||
if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
|
||||
proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
|
||||
|
||||
return proxies
|
||||
|
||||
def getproxies():
|
||||
return getproxies_environment() or getproxies_registry_patched()
|
||||
|
||||
del compat_os_name
|
@ -1,7 +1,9 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -11,6 +13,7 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
@ -20,6 +23,7 @@ from .aes import (
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -28,36 +32,24 @@ from .dependencies import (
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
Popen,
|
||||
YoutubeDLCookieJar,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
is_path_like,
|
||||
sanitize_url,
|
||||
str_or_none,
|
||||
try_call,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
class YDLLogger(_YDLLogger):
|
||||
def warning(self, message, only_once=False): # compat
|
||||
return super().warning(message, once=only_once)
|
||||
|
||||
class ProgressBar(MultilinePrinter):
|
||||
_DELAY, _timer = 0.1, 0
|
||||
@ -105,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
||||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
@ -146,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
with open(containers_path, encoding='utf8') as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
@ -346,7 +338,9 @@ class ChromeCookieDecryptor:
|
||||
Linux:
|
||||
- cookies are either v10 or v11
|
||||
- v10: AES-CBC encrypted with a fixed key
|
||||
- also attempts empty password if decryption fails
|
||||
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
||||
- also attempts empty password if decryption fails
|
||||
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
||||
|
||||
Mac:
|
||||
@ -361,7 +355,7 @@ class ChromeCookieDecryptor:
|
||||
|
||||
Sources:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
|
||||
- KeyStorageLinux::CreateService
|
||||
"""
|
||||
|
||||
@ -383,32 +377,49 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
|
||||
self._v11_key = None if password is None else self.derive_key(password)
|
||||
self._empty_key = self.derive_key(b'')
|
||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||
self._browser_keyring_name = browser_keyring_name
|
||||
self._keyring = keyring
|
||||
|
||||
@functools.cached_property
|
||||
def _v11_key(self):
|
||||
password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
|
||||
return None if password is None else self.derive_key(password)
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
"""
|
||||
|
||||
following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
|
||||
with an empty password. The failure detection is not the same as what chromium uses so the
|
||||
results won't be perfect
|
||||
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
|
||||
- a bugfix to try an empty password as a fallback
|
||||
"""
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
self._cookie_counts['v10'] += 1
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
||||
|
||||
elif version == b'v11':
|
||||
self._cookie_counts['v11'] += 1
|
||||
if self._v11_key is None:
|
||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
||||
|
||||
else:
|
||||
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||
self._cookie_counts['other'] += 1
|
||||
return None
|
||||
|
||||
@ -423,7 +434,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
@ -436,12 +447,12 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
# other prefixes are considered 'old data' which were stored as plaintext
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||
return encrypted_value
|
||||
|
||||
|
||||
@ -461,7 +472,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
# kNonceLength
|
||||
nonce_length = 96 // 8
|
||||
# boringssl
|
||||
@ -478,16 +489,20 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
# any other prefix means the data is DPAPI encrypted
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
|
||||
|
||||
|
||||
def _extract_safari_cookies(profile, logger):
|
||||
if profile is not None:
|
||||
logger.error('safari does not support profiles')
|
||||
if sys.platform != 'darwin':
|
||||
raise ValueError(f'unsupported platform: {sys.platform}')
|
||||
|
||||
if profile:
|
||||
cookies_path = os.path.expanduser(profile)
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('custom safari cookies database not found')
|
||||
|
||||
else:
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
@ -657,19 +672,27 @@ class _LinuxDesktopEnvironment(Enum):
|
||||
"""
|
||||
OTHER = auto()
|
||||
CINNAMON = auto()
|
||||
DEEPIN = auto()
|
||||
GNOME = auto()
|
||||
KDE = auto()
|
||||
KDE3 = auto()
|
||||
KDE4 = auto()
|
||||
KDE5 = auto()
|
||||
KDE6 = auto()
|
||||
PANTHEON = auto()
|
||||
UKUI = auto()
|
||||
UNITY = auto()
|
||||
XFCE = auto()
|
||||
LXQT = auto()
|
||||
|
||||
|
||||
class _LinuxKeyring(Enum):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
|
||||
SelectedLinuxBackend
|
||||
"""
|
||||
KWALLET = auto()
|
||||
KWALLET = auto() # KDE4
|
||||
KWALLET5 = auto()
|
||||
KWALLET6 = auto()
|
||||
GNOMEKEYRING = auto()
|
||||
BASICTEXT = auto()
|
||||
|
||||
@ -677,7 +700,7 @@ class _LinuxKeyring(Enum):
|
||||
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
|
||||
|
||||
|
||||
def _get_linux_desktop_environment(env):
|
||||
def _get_linux_desktop_environment(env, logger):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
|
||||
GetDesktopEnvironment
|
||||
@ -692,51 +715,97 @@ def _get_linux_desktop_environment(env):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif xdg_current_desktop == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif xdg_current_desktop == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif xdg_current_desktop == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif xdg_current_desktop == 'KDE':
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif xdg_current_desktop == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif xdg_current_desktop == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif xdg_current_desktop == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif xdg_current_desktop == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
else:
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session in ('mate', 'gnome'):
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'kde' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
elif 'xfce' in desktop_session:
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
else:
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
def _choose_linux_keyring(logger):
|
||||
"""
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
|
||||
SelectBackend
|
||||
SelectBackend in [1]
|
||||
|
||||
There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
|
||||
`Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
|
||||
does not appear to be called anywhere other than in tests, so the user would have to create this file manually
|
||||
and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
|
||||
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
|
||||
"""
|
||||
desktop_environment = _get_linux_desktop_environment(os.environ)
|
||||
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
|
||||
logger.debug(f'detected desktop environment: {desktop_environment.name}')
|
||||
if desktop_environment == _LinuxDesktopEnvironment.KDE:
|
||||
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
|
||||
linux_keyring = _LinuxKeyring.KWALLET
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
|
||||
linux_keyring = _LinuxKeyring.KWALLET5
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
linux_keyring = _LinuxKeyring.KWALLET6
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
linux_keyring = _LinuxKeyring.GNOMEKEYRING
|
||||
return linux_keyring
|
||||
|
||||
|
||||
def _get_kwallet_network_wallet(logger):
|
||||
def _get_kwallet_network_wallet(keyring, logger):
|
||||
""" The name of the wallet used to store network passwords.
|
||||
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
|
||||
KWalletDBus::NetworkWallet
|
||||
which does a dbus call to the following function:
|
||||
https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
|
||||
@ -744,10 +813,22 @@ def _get_kwallet_network_wallet(logger):
|
||||
"""
|
||||
default_wallet = 'kdewallet'
|
||||
try:
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
service_name = 'org.kde.kwalletd'
|
||||
wallet_path = '/modules/kwalletd'
|
||||
elif keyring == _LinuxKeyring.KWALLET5:
|
||||
service_name = 'org.kde.kwalletd5'
|
||||
wallet_path = '/modules/kwalletd5'
|
||||
elif keyring == _LinuxKeyring.KWALLET6:
|
||||
service_name = 'org.kde.kwalletd6'
|
||||
wallet_path = '/modules/kwalletd6'
|
||||
else:
|
||||
raise ValueError(keyring)
|
||||
|
||||
stdout, _, returncode = Popen.run([
|
||||
'dbus-send', '--session', '--print-reply=literal',
|
||||
'--dest=org.kde.kwalletd5',
|
||||
'/modules/kwalletd5',
|
||||
f'--dest={service_name}',
|
||||
wallet_path,
|
||||
'org.kde.KWallet.networkWallet'
|
||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
@ -762,8 +843,8 @@ def _get_kwallet_network_wallet(logger):
|
||||
return default_wallet
|
||||
|
||||
|
||||
def _get_kwallet_password(browser_keyring_name, logger):
|
||||
logger.debug('using kwallet-query to obtain password from kwallet')
|
||||
def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||
logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
|
||||
|
||||
if shutil.which('kwallet-query') is None:
|
||||
logger.error('kwallet-query command not found. KWallet and kwallet-query '
|
||||
@ -771,7 +852,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||
'included in the kwallet package for your distribution')
|
||||
return b''
|
||||
|
||||
network_wallet = _get_kwallet_network_wallet(logger)
|
||||
network_wallet = _get_kwallet_network_wallet(keyring, logger)
|
||||
|
||||
try:
|
||||
stdout, _, returncode = Popen.run([
|
||||
@ -793,8 +874,9 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||
# checks hasEntry. To verify this:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
# while starting chrome.
|
||||
# this may be a bug as the intended behaviour is to generate a random password and store
|
||||
# it, but that doesn't matter here.
|
||||
# this was identified as a bug later and fixed in
|
||||
# https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
|
||||
# https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
|
||||
return b''
|
||||
else:
|
||||
logger.debug('password found')
|
||||
@ -832,8 +914,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
||||
logger.debug(f'Chosen keyring: {keyring.name}')
|
||||
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
return _get_kwallet_password(browser_keyring_name, logger)
|
||||
if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
|
||||
return _get_kwallet_password(browser_keyring_name, keyring, logger)
|
||||
elif keyring == _LinuxKeyring.GNOMEKEYRING:
|
||||
return _get_gnome_keyring_password(browser_keyring_name, logger)
|
||||
elif keyring == _LinuxKeyring.BASICTEXT:
|
||||
@ -861,6 +943,10 @@ def _get_mac_keyring_password(browser_keyring_name, logger):
|
||||
|
||||
|
||||
def _get_windows_v10_key(browser_root, logger):
|
||||
"""
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
@ -869,11 +955,13 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
with open(path, encoding='utf8') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
# kOsCryptEncryptedKeyPrefName in [1]
|
||||
base64_key = data['os_crypt']['encrypted_key']
|
||||
except KeyError:
|
||||
logger.error('no encrypted key in Local State')
|
||||
return None
|
||||
encrypted_key = base64.b64decode(base64_key)
|
||||
# kDPAPIKeyPrefix in [1]
|
||||
prefix = b'DPAPI'
|
||||
if not encrypted_key.startswith(prefix):
|
||||
logger.error('invalid key')
|
||||
@ -885,11 +973,13 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||
for key in keys:
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
|
||||
@ -1085,3 +1175,150 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
|
||||
else:
|
||||
morsel = None
|
||||
|
||||
|
||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
"""
|
||||
See [1] for cookie file format.
|
||||
|
||||
1. https://curl.haxx.se/docs/http-cookies.html
|
||||
"""
|
||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||
_ENTRY_LEN = 7
|
||||
_HEADER = '''# Netscape HTTP Cookie File
|
||||
# This file is generated by yt-dlp. Do not edit.
|
||||
|
||||
'''
|
||||
_CookieFileEntry = collections.namedtuple(
|
||||
'CookieFileEntry',
|
||||
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
||||
|
||||
def __init__(self, filename=None, *args, **kwargs):
|
||||
super().__init__(None, *args, **kwargs)
|
||||
if is_path_like(filename):
|
||||
filename = os.fspath(filename)
|
||||
self.filename = filename
|
||||
|
||||
@staticmethod
|
||||
def _true_or_false(cndn):
|
||||
return 'TRUE' if cndn else 'FALSE'
|
||||
|
||||
@contextlib.contextmanager
|
||||
def open(self, file, *, write=False):
|
||||
if is_path_like(file):
|
||||
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
||||
yield f
|
||||
else:
|
||||
if write:
|
||||
file.truncate(0)
|
||||
yield file
|
||||
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
or not ignore_expires and cookie.is_expired(now)):
|
||||
continue
|
||||
name, value = cookie.name, cookie.value
|
||||
if value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas http.cookiejar regards it as a
|
||||
# cookie with no value.
|
||||
name, value = '', name
|
||||
f.write('%s\n' % '\t'.join((
|
||||
cookie.domain,
|
||||
self._true_or_false(cookie.domain.startswith('.')),
|
||||
cookie.path,
|
||||
self._true_or_false(cookie.secure),
|
||||
str_or_none(cookie.expires, default=''),
|
||||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
Save cookies to a file.
|
||||
Code is taken from CPython 3.6
|
||||
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
||||
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
filename = self.filename
|
||||
else:
|
||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||
|
||||
# Store session cookies with `expires` set to 0 instead of an empty string
|
||||
for cookie in self:
|
||||
if cookie.expires is None:
|
||||
cookie.expires = 0
|
||||
|
||||
with self.open(filename, write=True) as f:
|
||||
f.write(self._HEADER)
|
||||
self._really_save(f, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
filename = self.filename
|
||||
else:
|
||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||
|
||||
def prepare_line(line):
|
||||
if line.startswith(self._HTTPONLY_PREFIX):
|
||||
line = line[len(self._HTTPONLY_PREFIX):]
|
||||
# comments and empty lines are fine
|
||||
if line.startswith('#') or not line.strip():
|
||||
return line
|
||||
cookie_list = line.split('\t')
|
||||
if len(cookie_list) != self._ENTRY_LEN:
|
||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||
cookie = self._CookieFileEntry(*cookie_list)
|
||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||
return line
|
||||
|
||||
cf = io.StringIO()
|
||||
with self.open(filename) as f:
|
||||
for line in f:
|
||||
try:
|
||||
cf.write(prepare_line(line))
|
||||
except http.cookiejar.LoadError as e:
|
||||
if f'{line.strip()} '[0] in '[{"':
|
||||
raise http.cookiejar.LoadError(
|
||||
'Cookies file must be Netscape formatted, not JSON. See '
|
||||
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
||||
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
||||
continue
|
||||
cf.seek(0)
|
||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||
# Session cookies are denoted by either `expires` field set to
|
||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||
# (see [1]). So we need force the latter to be recognized as session
|
||||
# cookies on our own.
|
||||
# Session cookies may be important for cookies-based authentication,
|
||||
# e.g. usually, when user does not check 'Remember me' check box while
|
||||
# logging in on a site, some important cookies are stored as session
|
||||
# cookies so that not recognizing them will result in failed login.
|
||||
# 1. https://bugs.python.org/issue17164
|
||||
for cookie in self:
|
||||
# Treat `expires=0` cookies as session cookies
|
||||
if cookie.expires == 0:
|
||||
cookie.expires = None
|
||||
cookie.discard = True
|
||||
|
||||
def get_cookie_header(self, url):
|
||||
"""Generate a Cookie HTTP header for a given url"""
|
||||
cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
|
||||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
def get_cookies_for_url(self, url):
|
||||
"""Generate a list of Cookie objects for a given url"""
|
||||
# Policy `_now` attribute must be set before calling `_cookies_for_request`
|
||||
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
|
||||
self._policy._now = self._now = int(time.time())
|
||||
return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
return super().clear(*args, **kwargs)
|
||||
|
@ -1,6 +1,3 @@
|
||||
import types
|
||||
|
||||
from ..compat import functools
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
|
||||
try:
|
||||
@ -9,22 +6,33 @@ except ImportError:
|
||||
try:
|
||||
import Crypto as _parent
|
||||
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
|
||||
_parent = types.ModuleType('no_Cryptodome')
|
||||
_parent = passthrough_module(__name__, 'no_Cryptodome')
|
||||
__bool__ = lambda: False
|
||||
|
||||
passthrough_module(__name__, _parent, (..., '__version__'))
|
||||
del passthrough_module
|
||||
|
||||
__version__ = ''
|
||||
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
|
||||
try:
|
||||
if _parent.__name__ == 'Cryptodome':
|
||||
from Cryptodome import __version__
|
||||
from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5
|
||||
from Cryptodome.Hash import CMAC, SHA1
|
||||
from Cryptodome.PublicKey import RSA
|
||||
elif _parent.__name__ == 'Crypto':
|
||||
from Crypto import __version__
|
||||
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
|
||||
from Crypto.Hash import CMAC, SHA1 # noqa: F401
|
||||
from Crypto.PublicKey import RSA # noqa: F401
|
||||
except ImportError:
|
||||
__version__ = f'broken {__version__}'.strip()
|
||||
|
||||
@property
|
||||
@functools.cache
|
||||
def _yt_dlp__identifier():
|
||||
if _parent.__name__ == 'Crypto':
|
||||
from Crypto.Cipher import AES
|
||||
|
||||
_yt_dlp__identifier = _parent.__name__
|
||||
if AES and _yt_dlp__identifier == 'Crypto':
|
||||
try:
|
||||
# In pycrypto, mode defaults to ECB. See:
|
||||
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
|
||||
AES.new(b'abcdefghijklmnop')
|
||||
except TypeError:
|
||||
return 'pycrypto'
|
||||
return _parent.__name__
|
||||
_yt_dlp__identifier = 'pycrypto'
|
||||
|
@ -73,7 +73,7 @@ available_dependencies = {k: v for k, v in all_dependencies.items() if v}
|
||||
|
||||
|
||||
# Deprecated
|
||||
Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None
|
||||
Cryptodome_AES = Cryptodome.AES
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
@ -30,7 +30,7 @@ from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
@ -50,6 +50,7 @@ PROTOCOL_MAP = {
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
|
@ -49,10 +49,10 @@ class FileDownloader:
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
continuedl: Attempt to continue downloads if possible
|
||||
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
file_access_retries: Number of times to retry on file access error
|
||||
retries: Number of times to retry for expected network errors.
|
||||
Default is 0 for API, but 10 for CLI
|
||||
file_access_retries: Number of times to retry on file access error (default: 3)
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
@ -138,17 +138,21 @@ class FileDownloader:
|
||||
def format_percent(percent):
|
||||
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
@classmethod
|
||||
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
|
||||
if total is NO_DEFAULT:
|
||||
rate, remaining = start_or_rate, now_or_remaining
|
||||
if None in (rate, remaining):
|
||||
return None
|
||||
return int(float(remaining) / rate)
|
||||
|
||||
start, now = start_or_rate, now_or_remaining
|
||||
if total is None:
|
||||
return None
|
||||
if now is None:
|
||||
now = time.time()
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
rate = cls.calc_speed(start, now, current)
|
||||
return rate and int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
@ -165,6 +169,12 @@ class FileDownloader:
|
||||
def format_retries(retries):
|
||||
return 'inf' if retries == float('inf') else int(retries)
|
||||
|
||||
@staticmethod
|
||||
def filesize_or_none(unencoded_filename):
|
||||
if os.path.isfile(unencoded_filename):
|
||||
return os.path.getsize(unencoded_filename)
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
@ -225,7 +235,7 @@ class FileDownloader:
|
||||
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
|
||||
|
||||
def wrapper(self, func, *args, **kwargs):
|
||||
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
|
||||
for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except OSError as err:
|
||||
@ -245,6 +255,7 @@ class FileDownloader:
|
||||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
if os.path.isfile(filename):
|
||||
os.remove(filename)
|
||||
|
||||
@wrap_file_access('rename')
|
||||
@ -285,7 +296,8 @@ class FileDownloader:
|
||||
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
|
||||
else:
|
||||
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
|
||||
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
|
||||
self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
|
||||
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
|
||||
|
||||
def _finish_multiline_status(self):
|
||||
self._multiline.end()
|
||||
@ -407,7 +419,6 @@ class FileDownloader:
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
|
@ -1,14 +1,16 @@
|
||||
import enum
|
||||
import json
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
Popen,
|
||||
@ -23,9 +25,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
find_available_port,
|
||||
handle_youtubedl_headers,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
@ -43,6 +43,7 @@ class ExternalFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
self._cookies_tempfile = None
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
@ -55,6 +56,9 @@ class ExternalFD(FragmentFD):
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
|
||||
if retval == 0:
|
||||
status = {
|
||||
@ -126,6 +130,16 @@ class ExternalFD(FragmentFD):
|
||||
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
|
||||
keys, *args, **kwargs)
|
||||
|
||||
def _write_cookies(self):
|
||||
if not self.ydl.cookiejar.filename:
|
||||
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||
tmp_cookies.close()
|
||||
self._cookies_tempfile = tmp_cookies.name
|
||||
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
|
||||
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile)
|
||||
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
@ -176,7 +190,7 @@ class ExternalFD(FragmentFD):
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
|
||||
return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
|
||||
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
@ -185,6 +199,9 @@ class CurlFD(ExternalFD):
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['--cookie', cookie_header]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -215,6 +232,9 @@ class AxelFD(ExternalFD):
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', f'{key}: {val}']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@ -224,7 +244,9 @@ class WgetFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += ['--load-cookies', self._write_cookies()]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -272,7 +294,7 @@ class Aria2cFD(ExternalFD):
|
||||
return super()._call_downloader(tmpfilename, info_dict)
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
cmd = [self.exe, '-c', '--no-conf',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
@ -280,6 +302,8 @@ class Aria2cFD(ExternalFD):
|
||||
else:
|
||||
cmd += ['--min-split-size', '1M']
|
||||
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += [f'--load-cookies={self._write_cookies()}']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
@ -334,13 +358,12 @@ class Aria2cFD(ExternalFD):
|
||||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': f'{len(d)}',
|
||||
'Ytdl-request-proxy': '__noproxy__',
|
||||
})
|
||||
}, proxies={'all': None})
|
||||
with self.ydl.urlopen(request) as r:
|
||||
resp = json.load(r)
|
||||
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
|
||||
@ -418,6 +441,14 @@ class HttpieFD(ExternalFD):
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += [f'{key}:{val}']
|
||||
|
||||
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||
# 2: https://httpie.io/docs/cli/sessions
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += [f'Cookie:{cookie_header}']
|
||||
return cmd
|
||||
|
||||
|
||||
@ -528,11 +559,16 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
|
||||
for cookie in cookies)])
|
||||
if fmt.get('http_headers') and is_http:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||
|
||||
if start_time:
|
||||
args += ['-ss', str(start_time)]
|
||||
|
@ -3,11 +3,11 @@ import io
|
||||
import itertools
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import fix_xml_ampersands, xpath_text
|
||||
|
||||
|
||||
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||
@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
|
||||
if box_type == b'mdat':
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
except urllib.error.HTTPError as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
except HTTPError as err:
|
||||
if live and (err.status == 404 or err.status == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
|
@ -1,24 +1,19 @@
|
||||
import concurrent.futures
|
||||
import contextlib
|
||||
import http.client
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
RetryManager,
|
||||
encodeFilename,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
@ -34,8 +29,8 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error
|
||||
(DASH and hlsnative only). Default is 0 for API, but 10 for CLI
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||
@ -75,7 +70,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
return Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
@ -121,6 +116,11 @@ class FragmentFD(FileDownloader):
|
||||
'request_data': request_data,
|
||||
'ctx_id': ctx.get('ctx_id'),
|
||||
}
|
||||
frag_resume_len = 0
|
||||
if ctx['dl'].params.get('continuedl', True):
|
||||
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
|
||||
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
|
||||
|
||||
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||
if not success:
|
||||
return False
|
||||
@ -155,9 +155,7 @@ class FragmentFD(FileDownloader):
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
if not ctx['live']:
|
||||
if not ctx.setdefault('live', False):
|
||||
total_frags_str = '%d' % ctx['total_frags']
|
||||
ad_frags = ctx.get('ad_frags', 0)
|
||||
if ad_frags:
|
||||
@ -170,15 +168,17 @@ class FragmentFD(FileDownloader):
|
||||
**self.params,
|
||||
'noprogress': True,
|
||||
'test': False,
|
||||
'sleep_interval': 0,
|
||||
'max_sleep_interval': 0,
|
||||
'sleep_interval_subtitles': 0,
|
||||
})
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
open_mode = 'wb'
|
||||
resume_len = 0
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = self.filesize_or_none(tmpfilename)
|
||||
if resume_len > 0:
|
||||
open_mode = 'ab'
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
||||
# Should be initialized before ytdl file check
|
||||
ctx.update({
|
||||
@ -187,7 +187,9 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||
continuedl = self.params.get('continuedl', True)
|
||||
if continuedl and ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||
@ -201,7 +203,12 @@ class FragmentFD(FileDownloader):
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
self._write_ytdl_file(ctx)
|
||||
|
||||
else:
|
||||
if not continuedl:
|
||||
if ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
self._write_ytdl_file(ctx)
|
||||
assert ctx['fragment_index'] == 0
|
||||
|
||||
@ -274,12 +281,10 @@ class FragmentFD(FileDownloader):
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size - resume_len,
|
||||
state['downloaded_bytes'] - resume_len)
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes)
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
@ -290,14 +295,12 @@ class FragmentFD(FileDownloader):
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
self.try_remove(ytdl_filename)
|
||||
self.try_remove(self.ytdl_filename(ctx['filename']))
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
to_file = ctx['tmpfilename'] != '-'
|
||||
if to_file:
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
|
||||
downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
|
||||
else:
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
|
||||
@ -449,7 +452,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
headers = info_dict.get('http_headers', {}).copy()
|
||||
headers = HTTPHeaderDict(info_dict.get('http_headers'))
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
@ -466,9 +469,10 @@ class FragmentFD(FileDownloader):
|
||||
for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
|
||||
try:
|
||||
ctx['fragment_count'] = fragment.get('fragment_count')
|
||||
if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
|
||||
if not self._download_fragment(
|
||||
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
|
||||
return
|
||||
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
|
||||
except (HTTPError, IncompleteRead) as err:
|
||||
retry.error = err
|
||||
continue
|
||||
except DownloadError: # has own retry settings
|
||||
@ -496,7 +500,7 @@ class FragmentFD(FileDownloader):
|
||||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest, info_dict, allow_unplayable_formats=False):
|
||||
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
|
||||
return bool(re.search('|'.join((
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
)), manifest))
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
|
||||
UNSUPPORTED_FEATURES = [
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
|
||||
]
|
||||
if not allow_unplayable_formats:
|
||||
UNSUPPORTED_FEATURES += [
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
|
||||
]
|
||||
|
||||
def check_results():
|
||||
yield not info_dict.get('is_live')
|
||||
for feature in UNSUPPORTED_FEATURES:
|
||||
yield not re.search(feature, manifest)
|
||||
if not allow_unplayable_formats:
|
||||
yield not cls._has_drm(manifest)
|
||||
return all(check_results())
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@ -64,13 +75,13 @@ class HlsFD(FragmentFD):
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
has_ffmpeg = FFmpegFD.available()
|
||||
no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
if no_crypto and has_ffmpeg:
|
||||
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
|
||||
elif no_crypto:
|
||||
@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
|
||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
||||
if not can_download:
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), s)
|
||||
if has_drm and not self.params.get('allow_unplayable_formats'):
|
||||
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
|
||||
if info_dict.get('has_drm') and self.params.get('test'):
|
||||
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
|
||||
else:
|
||||
self.report_error(
|
||||
'This video is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format')
|
||||
'This format is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format', tb=False)
|
||||
return False
|
||||
message = message or 'Unsupported features have been detected'
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
|
@ -1,12 +1,14 @@
|
||||
import http.client
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
TransportError,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
RetryManager,
|
||||
@ -16,18 +18,10 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
try_call,
|
||||
write_xattr,
|
||||
)
|
||||
|
||||
RESPONSE_READ_EXCEPTIONS = (
|
||||
TimeoutError,
|
||||
socket.timeout, # compat: py < 3.10
|
||||
ConnectionError,
|
||||
ssl.SSLError,
|
||||
http.client.HTTPException
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
@ -45,11 +39,8 @@ class HttpFD(FileDownloader):
|
||||
ctx.tmpfilename = self.temp_name(filename)
|
||||
ctx.stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
# Disable compression
|
||||
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
|
||||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = sanitized_Request(url, request_data, headers)
|
||||
request = Request(url, request_data, headers)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
|
||||
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
@ -150,20 +141,21 @@ class HttpFD(FileDownloader):
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
elif range_start > 0:
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code == 416:
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
|
||||
except HTTPError as err:
|
||||
if err.status == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, request_data, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
Request(url, request_data, headers))
|
||||
content_length = ctx.data.headers['Content-Length']
|
||||
except HTTPError as err:
|
||||
if err.status < 500 or err.status >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
@ -191,17 +183,13 @@ class HttpFD(FileDownloader):
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
elif err.status < 500 or err.status >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except urllib.error.URLError as err:
|
||||
if isinstance(err.reason, ssl.CertificateError):
|
||||
except CertificateVerifyError:
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
||||
# Any errors that occur during this will not be wrapped by URLError
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
raise RetryDownload(err)
|
||||
|
||||
def close_stream():
|
||||
@ -211,7 +199,12 @@ class HttpFD(FileDownloader):
|
||||
ctx.stream = None
|
||||
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
data_len = ctx.data.headers.get('Content-length')
|
||||
|
||||
if ctx.data.headers.get('Content-encoding'):
|
||||
# Content-encoding is present, Content-length is not reliable anymore as we are
|
||||
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
|
||||
data_len = None
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
@ -252,7 +245,7 @@ class HttpFD(FileDownloader):
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
retry(err)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
@ -333,15 +326,15 @@ class HttpFD(FileDownloader):
|
||||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
@ -353,7 +346,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
@ -2,9 +2,9 @@ import binascii
|
||||
import io
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import RetryManager
|
||||
|
||||
u8 = struct.Struct('>B')
|
||||
@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
extra_state['ism_track_written'] = True
|
||||
self._append_fragment(ctx, frag_content)
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
|
||||
|
@ -1,8 +1,12 @@
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from ..utils import sanitized_Request
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
@ -24,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
@ -50,3 +54,93 @@ class NiconicoDmcFD(FileDownloader):
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
cookies = info_dict.get('cookies')
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
|
||||
new_info_dict = info_dict.copy()
|
||||
new_info_dict.update({
|
||||
'protocol': 'm3u8',
|
||||
})
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookies': str_or_none(cookies) or '',
|
||||
'Origin': f'https://{ws_origin_host}',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||
})
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'stream': {
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'chasePlay': False
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True
|
||||
},
|
||||
'reconnect': True,
|
||||
}
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
with ws:
|
||||
while True:
|
||||
recv = ws.recv()
|
||||
if not recv:
|
||||
continue
|
||||
data = json.loads(recv)
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
if data.get('type') == 'ping':
|
||||
# pong back
|
||||
ws.send(r'{"type":"pong"}')
|
||||
ws.send(r'{"type":"keepSeat"}')
|
||||
elif data.get('type') == 'disconnect':
|
||||
self.write_debug(data)
|
||||
return True
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(data)
|
||||
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
||||
return DownloadError(message)
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
while True:
|
||||
try:
|
||||
ret = communicate_ws(reconnect)
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
reconnect = True
|
||||
|
||||
thread = threading.Thread(target=ws_main, daemon=True)
|
||||
thread.start()
|
||||
|
||||
return dl.download(filename, new_info_dict)
|
||||
|
@ -1,8 +1,8 @@
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class YoutubeLiveChatFD(FragmentFD):
|
||||
@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
start_time = int(time.time() * 1000)
|
||||
|
||||
def dl_fragment(url, data=None, headers=None):
|
||||
http_headers = info_dict.get('http_headers', {})
|
||||
if headers:
|
||||
http_headers = http_headers.copy()
|
||||
http_headers.update(headers)
|
||||
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
return False, None, None, None
|
||||
|
@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
@ -102,6 +101,7 @@ from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .anchorfm import AnchorFMEpisodeIE
|
||||
from .angel import AngelIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
@ -203,13 +203,18 @@ from .bfmtv import (
|
||||
BFMTVLiveIE,
|
||||
BFMTVArticleIE,
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bibeltv import (
|
||||
BibelTVLiveIE,
|
||||
BibelTVSeriesIE,
|
||||
BibelTVVideoIE,
|
||||
)
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
@ -238,19 +243,28 @@ from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .booyah import BooyahClipsIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
BRMediathekIE,
|
||||
)
|
||||
from .bravotv import BravoTVIE
|
||||
from .brainpop import (
|
||||
BrainPOPIE,
|
||||
BrainPOPJrIE,
|
||||
BrainPOPELLIE,
|
||||
BrainPOPEspIE,
|
||||
BrainPOPFrIE,
|
||||
BrainPOPIlIE,
|
||||
)
|
||||
from .breakcom import BreakIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
@ -270,6 +284,10 @@ from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .camfm import (
|
||||
CamFMEpisodeIE,
|
||||
CamFMShowIE
|
||||
)
|
||||
from .cammodels import CamModelsIE
|
||||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
@ -277,12 +295,6 @@ from .camwithher import CamWithHerIE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
VrtNUIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
@ -295,15 +307,18 @@ from .cbc import (
|
||||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
from .cbs import (
|
||||
CBSIE,
|
||||
ParamountPressExpressIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
CBSNewsIE,
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
CBSLocalLiveIE,
|
||||
CBSNewsLiveIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import (
|
||||
@ -342,6 +357,7 @@ from .ciscolive import (
|
||||
)
|
||||
from .ciscowebex import CiscoWebexIE
|
||||
from .cjsw import CJSWIE
|
||||
from .clipchamp import ClipchampIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
@ -389,9 +405,12 @@ from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
CrowdBunkerChannelIE,
|
||||
)
|
||||
from .crtvg import CrtvgIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
CrunchyrollMusicIE,
|
||||
CrunchyrollArtistIE,
|
||||
)
|
||||
from .cspan import CSpanIE, CSpanCongressIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
@ -408,6 +427,10 @@ from .cybrary import (
|
||||
CybraryIE,
|
||||
CybraryCourseIE
|
||||
)
|
||||
from .dacast import (
|
||||
DacastVODIE,
|
||||
DacastPlaylistIE,
|
||||
)
|
||||
from .daftsex import DaftsexIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@ -438,6 +461,10 @@ from .deezer import (
|
||||
)
|
||||
from .democracynow import DemocracynowIE
|
||||
from .detik import DetikEmbedIE
|
||||
from .dlf import (
|
||||
DLFIE,
|
||||
DLFCorpusIE,
|
||||
)
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
@ -470,6 +497,7 @@ from .dplay import (
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
@ -493,6 +521,7 @@ from .deuxm import (
|
||||
DeuxMNewsIE
|
||||
)
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
@ -507,6 +536,7 @@ from .dw import (
|
||||
)
|
||||
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ebay import EbayIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .egghead import (
|
||||
EggheadCourseIE,
|
||||
@ -516,6 +546,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elevensports import ElevenSportsIE
|
||||
from .ellentube import (
|
||||
EllenTubeIE,
|
||||
EllenTubeVideoIE,
|
||||
@ -549,6 +580,7 @@ from .espn import (
|
||||
ESPNCricInfoIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .ettutv import EttuTvIE
|
||||
from .europa import EuropaIE, EuroParlWebstreamIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
@ -635,6 +667,7 @@ from .funimation import (
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fusion import FusionIE
|
||||
from .fuyintv import FuyinTVIE
|
||||
from .gab import (
|
||||
@ -670,10 +703,18 @@ from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globalplayer import (
|
||||
GlobalPlayerLiveIE,
|
||||
GlobalPlayerLivePlaylistIE,
|
||||
GlobalPlayerAudioIE,
|
||||
GlobalPlayerAudioEpisodeIE,
|
||||
GlobalPlayerVideoIE
|
||||
)
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .gmanetwork import GMANetworkVideoIE
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .gofile import GofileIE
|
||||
@ -705,13 +746,16 @@ from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hollywoodreporter import (
|
||||
HollywoodReporterIE,
|
||||
HollywoodReporterPlaylistIE,
|
||||
)
|
||||
from .holodex import HolodexIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
@ -723,6 +767,7 @@ from .hotstar import (
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
from .hrfensehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
@ -745,12 +790,14 @@ from .hungama import (
|
||||
HungamaAlbumPlaylistIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
from .icareus import IcareusIE
|
||||
from .ichinanalive import (
|
||||
IchinanaLiveIE,
|
||||
IchinanaLiveClipIE,
|
||||
)
|
||||
from .idolplus import IdolPlusIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
IGNVideoIE,
|
||||
@ -835,6 +882,7 @@ from .japandiet import (
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jstream import JStreamIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
@ -844,7 +892,6 @@ from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
@ -857,6 +904,7 @@ from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
from .kompas import KompasVideoIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .koo import KooIE
|
||||
@ -908,6 +956,10 @@ from .leeco import (
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .lefigaro import (
|
||||
LeFigaroVideoEmbedIE,
|
||||
LeFigaroVideoSectionIE,
|
||||
)
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .lenta import LentaIE
|
||||
@ -926,10 +978,6 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import (
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInIE,
|
||||
LinkedInLearningIE,
|
||||
@ -956,11 +1004,15 @@ from .lrt import (
|
||||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
@ -1069,7 +1121,8 @@ from .mojvideo import MojvideoIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
@ -1089,6 +1142,7 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
@ -1110,6 +1164,7 @@ from .myvi import (
|
||||
)
|
||||
from .myvideoge import MyVideoGeIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .mzaalo import MzaaloIE
|
||||
from .n1 import (
|
||||
N1InfoAssetIE,
|
||||
N1InfoIIE,
|
||||
@ -1158,6 +1213,7 @@ from .nebula import (
|
||||
NebulaSubscriptionsIE,
|
||||
NebulaChannelIE,
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .neteasemusic import (
|
||||
@ -1206,6 +1262,9 @@ from .nhk import (
|
||||
NhkForSchoolBangumiIE,
|
||||
NhkForSchoolSubjectIE,
|
||||
NhkForSchoolProgramListIE,
|
||||
NhkRadioNewsPageIE,
|
||||
NhkRadiruIE,
|
||||
NhkRadiruLiveIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
@ -1225,6 +1284,7 @@ from .niconico import (
|
||||
NicovideoSearchIE,
|
||||
NicovideoSearchURLIE,
|
||||
NicovideoTagURLIE,
|
||||
NiconicoLiveIE,
|
||||
)
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
@ -1282,6 +1342,7 @@ from .nrl import NRLTVIE
|
||||
from .ntvcojp import NTVCoJpCUIE
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nubilesporn import NubilesPornIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
@ -1292,6 +1353,7 @@ from .nzherald import NZHeraldIE
|
||||
from .nzonscreen import NZOnScreenIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odkmedia import OnDemandChinaEpisodeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oftv import (
|
||||
OfTVIE,
|
||||
@ -1332,6 +1394,7 @@ from .orf import (
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
from .owncloud import OwnCloudIE
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
@ -1357,7 +1420,7 @@ from .patreon import (
|
||||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
@ -1375,6 +1438,7 @@ from .periscope import (
|
||||
PeriscopeIE,
|
||||
PeriscopeUserIE,
|
||||
)
|
||||
from .pgatour import PGATourIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@ -1432,7 +1496,6 @@ from .polskieradio import (
|
||||
PolskieRadioPlayerIE,
|
||||
PolskieRadioPodcastIE,
|
||||
PolskieRadioPodcastListIE,
|
||||
PolskieRadioRadioKierowcowIE,
|
||||
)
|
||||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
@ -1455,6 +1518,7 @@ from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
@ -1469,6 +1533,7 @@ from .prx import (
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
@ -1501,6 +1566,8 @@ from .radlive import (
|
||||
RadLiveSeasonIE,
|
||||
)
|
||||
from .rai import (
|
||||
RaiIE,
|
||||
RaiCulturaIE,
|
||||
RaiPlayIE,
|
||||
RaiPlayLiveIE,
|
||||
RaiPlayPlaylistIE,
|
||||
@ -1509,13 +1576,16 @@ from .rai import (
|
||||
RaiPlaySoundPlaylistIE,
|
||||
RaiNewsIE,
|
||||
RaiSudtirolIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
RCSEmbedsIE,
|
||||
@ -1527,6 +1597,7 @@ from .rcti import (
|
||||
RCTIPlusTVIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .recurbate import RecurbateIE
|
||||
from .redbee import ParliamentLiveUKIE, RTBFIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
@ -1549,6 +1620,7 @@ from .rentv import (
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rice import RICEIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@ -1563,6 +1635,7 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .rozhlas import (
|
||||
RozhlasIE,
|
||||
RozhlasVltavaIE,
|
||||
MujRozhlasIE,
|
||||
)
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import (
|
||||
@ -1586,6 +1659,11 @@ from .rtnews import (
|
||||
from .rtp import RTPIE
|
||||
from .rtrfm import RTRFMIE
|
||||
from .rts import RTSIE
|
||||
from .rtvcplay import (
|
||||
RTVCPlayIE,
|
||||
RTVCPlayEmbedIE,
|
||||
RTVCKalturaIE,
|
||||
)
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
@ -1631,6 +1709,7 @@ from .ruv import (
|
||||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import S4CIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@ -1655,6 +1734,7 @@ from .scte import (
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servus import ServusIE
|
||||
@ -1752,6 +1832,7 @@ from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stageplus import StagePlusVODConcertIE
|
||||
from .startrek import StarTrekIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
@ -1777,6 +1858,10 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
@ -1789,7 +1874,6 @@ from .storyfire import (
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamanity import StreamanityIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
@ -1827,7 +1911,10 @@ from .teachertube import (
|
||||
TeacherTubeUserIE,
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .teamcoco import (
|
||||
TeamcocoIE,
|
||||
ConanClassicIE,
|
||||
)
|
||||
from .teamtreehouse import TeamTreeHouseIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import (
|
||||
@ -1839,6 +1926,7 @@ from .ted import (
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecaribe import TelecaribePlayIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telegram import TelegramEmbedIE
|
||||
@ -1853,7 +1941,7 @@ from .telequebec import (
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tempo import TempoIE
|
||||
from .tempo import TempoIE, IVXPlayerIE
|
||||
from .tencent import (
|
||||
IflixEpisodeIE,
|
||||
IflixSeriesIE,
|
||||
@ -1930,6 +2018,7 @@ from .traileraddict import TrailerAddictIE
|
||||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerUserIE,
|
||||
TrillerShortIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
@ -1951,10 +2040,9 @@ from .tubitv import (
|
||||
)
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import (
|
||||
TuneInClipIE,
|
||||
TuneInStationIE,
|
||||
TuneInProgramIE,
|
||||
TuneInTopicIE,
|
||||
TuneInPodcastIE,
|
||||
TuneInPodcastEpisodeIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .tunepk import TunePkIE
|
||||
@ -2022,7 +2110,6 @@ from .tvp import (
|
||||
)
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
@ -2181,12 +2268,16 @@ from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
ViuOTTIE,
|
||||
ViuOTTIndonesiaIE,
|
||||
)
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
VKPlayIE,
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
@ -2204,7 +2295,12 @@ from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vrt import VRTIE
|
||||
from .vrt import (
|
||||
VRTIE,
|
||||
VrtNUIE,
|
||||
KetnetIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
VRVIE,
|
||||
@ -2255,8 +2351,20 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
WeverseIE,
|
||||
WeverseMediaIE,
|
||||
WeverseMomentIE,
|
||||
WeverseLiveTabIE,
|
||||
WeverseMediaTabIE,
|
||||
WeverseLiveIE,
|
||||
)
|
||||
from .wevidi import WeVidiIE
|
||||
from .weyyak import WeyyakIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .wistia import (
|
||||
@ -2282,6 +2390,12 @@ from .wsj import (
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .wykop import (
|
||||
WykopDigIE,
|
||||
WykopDigCommentIE,
|
||||
WykopPostIE,
|
||||
WykopPostCommentIE,
|
||||
)
|
||||
from .xanimu import XanimuIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
@ -2301,13 +2415,14 @@ from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xvideos import (
|
||||
XVideosIE,
|
||||
XVideosQuickiesIE
|
||||
)
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
@ -2325,6 +2440,10 @@ from .yandexvideo import (
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yappy import (
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
@ -2342,6 +2461,10 @@ from .younow import (
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .zaiko import (
|
||||
ZaikoIE,
|
||||
ZaikoETicketIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
@ -2399,6 +2522,7 @@ from .zingmp3 import (
|
||||
ZingMp3WeekChartIE,
|
||||
ZingMp3ChartMusicVideoIE,
|
||||
ZingMp3UserIE,
|
||||
ZingMp3HubIE,
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
'id': '102520540',
|
||||
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
|
||||
video = True
|
||||
|
||||
if mobj is None:
|
||||
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
|
||||
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
|
||||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
youtube = mobj.group('type') == 'YouTube'
|
||||
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
|
||||
video = mobj.group('type') == 'Video' or traverse_obj(
|
||||
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
@ -22,80 +22,23 @@ from ..utils import (
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
@ -137,11 +80,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
@ -213,10 +156,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@ -436,6 +376,16 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
if not title:
|
||||
title = traverse_obj(api_response, ('episode', 'title'))
|
||||
if not description:
|
||||
description = traverse_obj(api_response, ('episode', 'content'))
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||
elif video_type == 'slots':
|
||||
|
@ -40,28 +40,33 @@ class ACastBaseIE(InfoExtractor):
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'description': 'md5:013959207e05011ad14a222cf22278cc',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'creator': 'Third Ear Studio',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
'episode_number': 2,
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
|
||||
'info_dict': {
|
||||
'id': '646c68fb21fbf20011e9c651',
|
||||
'ext': 'mp3',
|
||||
'creator': 'The Australian National University',
|
||||
'display_id': 'can-labor-be-a-long-form-government',
|
||||
'duration': 2618,
|
||||
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
|
||||
'title': 'Can Labor be a long-form government?',
|
||||
'episode': 'Can Labor be a long-form government?',
|
||||
'upload_date': '20230523',
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
|
@ -6,10 +6,8 @@ import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
if e.cause.status == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@ -2,11 +2,11 @@ import getpass
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
|
||||
post_url = compat_urlparse.urljoin(urlh.url, post_url)
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
@ -1573,7 +1573,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'Spectrum':
|
||||
elif mso_id in ('Spectrum', 'Charter_Direct'):
|
||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||
# as a one-off implementation.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
urlh.url, video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
urlh.url, video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
urlh.url, video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
urlh.url, video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
provider_redirect_page, url=urlh.url)
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
|
@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
info['formats'].extend(fmts)
|
||||
self._merge_subtitles(subs, target=info['subtitles'])
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
|
@ -3,6 +3,8 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@ -72,7 +74,14 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
get_all=False)
|
||||
if not result:
|
||||
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
|
||||
video_id=remove_start(filter_value, '/'))
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
@ -303,6 +313,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
|
@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import ExtractorError, traverse_obj, url_or_none
|
||||
|
||||
|
||||
class AeonCoIE(InfoExtractor):
|
||||
@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
'info_dict': {
|
||||
'id': '728595228',
|
||||
'id': '759576926',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wrought',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
||||
'uploader': 'Biofilm Productions',
|
||||
'uploader_id': 'user140352216',
|
||||
'uploader_url': 'https://vimeo.com/user140352216',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
'info_dict': {
|
||||
'id': 'emyi4z-O0ls',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
|
||||
'uploader': 'TED-Ed',
|
||||
'uploader_id': '@TEDEd',
|
||||
'uploader_url': 'https://www.youtube.com/@TEDEd',
|
||||
'duration': 344,
|
||||
'upload_date': '20200827',
|
||||
'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
|
||||
'playable_in_embed': True,
|
||||
'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
|
||||
'categories': ['Education'],
|
||||
'like_count': int,
|
||||
'channel': 'TED-Ed',
|
||||
'chapters': 'count:7',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
|
||||
'tags': 'count:26',
|
||||
'availability': 'public',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
'live_status': 'not_live',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
||||
return self.url_result(vimeo_url, VimeoIE)
|
||||
embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
|
||||
if not embed_url:
|
||||
raise ExtractorError('No embed URL found in webpage')
|
||||
if 'player.vimeo.com' in embed_url:
|
||||
embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
|
||||
return self.url_result(embed_url)
|
||||
|
@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||
'info_dict': {
|
||||
'id': '18650793',
|
||||
'ext': 'mp4',
|
||||
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '윈아디',
|
||||
'uploader_id': 'badkids',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||
'info_dict': {
|
||||
'id': '10481652',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'duration': 6492,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '20160502_c4c62b9d_174361386_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 3601,
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '20160502_39e739bb_174361386_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 2891,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/player/15055030',
|
||||
'only_matching': True,
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
||||
'upload_date': '20230108',
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -223,26 +179,21 @@ class AfreecaTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
partial_view = False
|
||||
adult_view = False
|
||||
for _ in range(2):
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view',
|
||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
if traverse_obj(data, ('code', {int})) == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'nStationNo': data['station_no'],
|
||||
'nBbsNo': data['bbs_no'],
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
|
@ -191,7 +191,7 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!,
|
||||
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:season'
|
||||
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
|
||||
IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'playlist_mincount': 6,
|
||||
@ -250,6 +250,7 @@ query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonI
|
||||
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:series'
|
||||
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'playlist_mincount': 3,
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
@ -72,6 +72,12 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -100,7 +106,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
@ -117,29 +123,73 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# America's Test Kitchen Series
|
||||
'url': 'https://www.americastestkitchen.com/',
|
||||
'info_dict': {
|
||||
'id': 'americastestkitchen',
|
||||
'title': 'America\'s Test Kitchen',
|
||||
},
|
||||
'playlist_count': 558,
|
||||
}, {
|
||||
# Cooks Country Series
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry',
|
||||
'info_dict': {
|
||||
'id': 'cookscountry',
|
||||
'title': 'Cook\'s Country',
|
||||
},
|
||||
'playlist_count': 199,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_path, season_number = self._match_valid_url(url).group('show', 'id')
|
||||
season_number = int(season_number)
|
||||
season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2')
|
||||
show_path = ('/' + show) if show else ''
|
||||
show = show or show1
|
||||
season_number = int_or_none(season_number)
|
||||
|
||||
slug = 'cco' if show_path == '/cookscountry' else 'atk'
|
||||
slug, title = {
|
||||
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
|
||||
'cookscountry': ('cco', 'Cook\'s Country'),
|
||||
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
|
||||
}[show]
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
facet_filters = [
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@ -162,4 +212,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
entries(), playlist_id, playlist_title)
|
||||
|
@ -5,6 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def _extract_feed_info(self, url):
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
@ -73,8 +74,10 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
|
98
plugins/youtube_download/yt_dlp/extractor/anchorfm.py
Normal file
98
plugins/youtube_download/yt_dlp/extractor/anchorfm.py
Normal file
@ -0,0 +1,98 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class AnchorFMEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
|
||||
'info_dict': {
|
||||
'id': 'e1tpt3d',
|
||||
'ext': 'mp3',
|
||||
'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
|
||||
'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
|
||||
'duration': 624.718,
|
||||
'uploader': 'Lovelyti ',
|
||||
'uploader_id': '991541',
|
||||
'channel': 'lovelyti',
|
||||
'modified_date': '20230121',
|
||||
'modified_timestamp': 1674285178,
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
}
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'duration': 1042.008,
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'release_date': '20221221',
|
||||
'release_timestamp': 1671595916,
|
||||
'modified_date': '20221221',
|
||||
'modified_timestamp': 1671590834,
|
||||
'channel': 'apakatatempo',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'uploader_id': '2585461',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
}
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'release_date': '20221221',
|
||||
'duration': 1042.008,
|
||||
'season': 'Season 2',
|
||||
'modified_timestamp': 1671590834,
|
||||
'uploader_id': '2585461',
|
||||
'modified_date': '20221221',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'season_number': 2,
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'release_timestamp': 1671595916,
|
||||
'episode_id': 'e1shjqd',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
|
||||
api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': traverse_obj(api_data, ('episode', 'title')),
|
||||
'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
|
||||
'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
|
||||
'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
|
||||
'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
|
||||
'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
|
||||
'episode_id': episode_id,
|
||||
'uploader': traverse_obj(api_data, ('creator', 'name')),
|
||||
'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
|
||||
'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
|
||||
'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
@ -121,7 +121,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
errnote='Could not resolve canonical player URL').url
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
|
@ -336,7 +336,7 @@ class AnvatoIE(InfoExtractor):
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
# For some videos the initial m3u8 URL returns JSON instead
|
||||
manifest_json = self._download_json(
|
||||
video_url, video_id, note='Downloading manifest JSON', errnote=False)
|
||||
video_url, video_id, note='Downloading manifest JSON', fatal=False)
|
||||
if manifest_json:
|
||||
video_url = manifest_json.get('master_m3u8')
|
||||
if not video_url:
|
||||
@ -392,14 +392,6 @@ class AnvatoIE(InfoExtractor):
|
||||
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
||||
yield cls.url_result(url, AnvatoIE, video_id)
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
|
@ -1,16 +1,16 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
@ -899,7 +899,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
@ -926,7 +926,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
@ -1052,7 +1052,7 @@ class VLiveWebArchiveIE(InfoExtractor):
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
@ -408,6 +409,23 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||
'id': '12172961',
|
||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||
'duration': 5241,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||
'timestamp': 1670710500,
|
||||
'upload_date': '20221210',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 12,
|
||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||
'series': 'Filme im MDR'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
@ -424,7 +442,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'skip': 'Error',
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'md5': 'f1837e563323b8a642a8ddeff0131f51',
|
||||
'md5': '1e73ded21cb79bac065117e80c81dc88',
|
||||
'info_dict': {
|
||||
'id': '10049223',
|
||||
'ext': 'mp4',
|
||||
@ -432,13 +450,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
|
||||
'playlist_count': 6,
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
|
||||
'title': 'beforeigners/beforeigners/staffel-1',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@ -602,6 +618,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
src
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
@ -640,6 +659,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
'thumbnail': (media_collection.get('_previewImage')
|
||||
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||
or self.get_thumbnail_from_html(display_id, url)),
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
return info
|
||||
|
||||
def get_thumbnail_from_html(self, display_id, url):
|
||||
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||
return (
|
||||
self._og_search_thumbnail(webpage, default=None)
|
||||
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
|
@ -2,11 +2,11 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@ -277,7 +277,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
if self._LOGIN_URL in urlh.url:
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
@ -388,8 +388,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
if not (isinstance(e.exc_info[1], HTTPError)
|
||||
and e.exc_info[1].status in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
@ -472,7 +472,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
@ -983,7 +983,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
|
@ -1,27 +1,197 @@
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BibelTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
||||
'md5': '252f908192d611de038b8504b08bf97f',
|
||||
'info_dict': {
|
||||
'id': 'ref:329703',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprachkurs in Malaiisch',
|
||||
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
||||
'timestamp': 1608316701,
|
||||
'uploader_id': '5840105145001',
|
||||
'upload_date': '20201218',
|
||||
class BibelTVBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
API_URL = 'https://www.bibeltv.de/mediathek/api'
|
||||
AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
|
||||
|
||||
def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
|
||||
media_ext = determine_ext(media_url)
|
||||
if media_ext == 'm3u8':
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, crn_id, live=is_live)
|
||||
formats.extend(m3u8_formats)
|
||||
subtitles.update(m3u8_subs)
|
||||
elif media_ext == 'mpd':
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
|
||||
formats.extend(mpd_formats)
|
||||
subtitles.update(mpd_subs)
|
||||
elif media_ext == 'mp4':
|
||||
formats.append({'url': media_url})
|
||||
else:
|
||||
self.report_warning(f'Unknown format {media_ext!r}')
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@staticmethod
|
||||
def _extract_base_info(data):
|
||||
return {
|
||||
'id': data['crn'],
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
'view_count': 'viewCount',
|
||||
'like_count': 'likeCount',
|
||||
}),
|
||||
'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
|
||||
'url': ('url', {url_or_none}),
|
||||
}))),
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
||||
'only_matching': True,
|
||||
|
||||
def _extract_url_info(self, data):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
|
||||
**self._extract_base_info(data),
|
||||
}
|
||||
|
||||
def _extract_video_info(self, data):
|
||||
crn_id = data['crn']
|
||||
|
||||
if data.get('drm'):
|
||||
self.report_drm(crn_id)
|
||||
|
||||
json_data = self._download_json(
|
||||
format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
|
||||
headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
|
||||
errnote='No formats available') or {}
|
||||
|
||||
formats, subtitles = self._extract_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
**self._extract_base_info(data),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BibelTVVideoIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV single video'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
|
||||
IE_NAME = 'bibeltv:video'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
|
||||
'md5': 'ec1c07efe54353780512e8a4103b612e',
|
||||
'info_dict': {
|
||||
'id': '344436',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alte Wege',
|
||||
'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
|
||||
'timestamp': 1677877071,
|
||||
'duration': 150.0,
|
||||
'upload_date': '20230303',
|
||||
'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': '6',
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
||||
video_data = traverse_obj(
|
||||
self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
|
||||
('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
|
||||
if not video_data:
|
||||
raise ExtractorError('Missing video data.')
|
||||
|
||||
return self._extract_video_info(video_data)
|
||||
|
||||
|
||||
class BibelTVSeriesIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV series playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
|
||||
IE_NAME = 'bibeltv:series'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
|
||||
'playlist_mincount': 400,
|
||||
'info_dict': {
|
||||
'id': '333485',
|
||||
'title': 'Ein Wunder für jeden Tag',
|
||||
'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, crn_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, crn_id)
|
||||
series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
|
||||
if not series_data:
|
||||
raise ExtractorError('Missing series data.')
|
||||
|
||||
return self.playlist_result(
|
||||
traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
|
||||
crn_id, series_data.get('title'), clean_html(series_data.get('description')))
|
||||
|
||||
|
||||
class BibelTVLiveIE(BibelTVBaseIE):
|
||||
IE_DESC = 'BibelTV live program'
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
|
||||
IE_NAME = 'bibeltv:live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
|
||||
'info_dict': {
|
||||
'id': 'bibeltv',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Bibel TV',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/livestreams/impuls/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
stream_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, stream_id)
|
||||
stream_data = self._search_json(
|
||||
r'\\"video\\":', webpage, 'bibeltvData', stream_id,
|
||||
transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
|
||||
|
||||
formats, subtitles = self._extract_formats_and_subtitles(
|
||||
traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
|
||||
|
||||
return {
|
||||
'id': stream_id,
|
||||
'title': stream_data.get('title'),
|
||||
'thumbnail': stream_data.get('poster'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -1,12 +1,14 @@
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import urllib.error
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
@ -16,6 +18,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@ -26,6 +29,8 @@ from ..utils import (
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
@ -81,7 +86,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
f'{line["content"]}\n\n')
|
||||
return srt_data
|
||||
|
||||
def _get_subtitles(self, video_id, initial_state, cid):
|
||||
def _get_subtitles(self, video_id, aid, cid):
|
||||
subtitles = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
@ -89,7 +94,8 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}]
|
||||
}
|
||||
|
||||
for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []:
|
||||
video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
|
||||
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
@ -130,9 +136,20 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
def _get_episodes_from_season(self, ss_id, url):
|
||||
season_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||
note='Downloading season info', query={'season_id': ss_id},
|
||||
headers={'Referer': url, **self.geo_verification_headers()})
|
||||
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@ -280,19 +297,60 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'video redirects to festival page',
|
||||
'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
|
||||
'info_dict': {
|
||||
'id': 'BV1wP4y1P72h',
|
||||
'ext': 'mp4',
|
||||
'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
|
||||
'timestamp': 1643947497,
|
||||
'upload_date': '20220204',
|
||||
'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
|
||||
'uploader': '叨叨冯聊音乐',
|
||||
'duration': 246.719,
|
||||
'uploader_id': '528182630',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
'info_dict': {
|
||||
'id': 'BV1ay4y1d77f',
|
||||
'ext': 'mp4',
|
||||
'title': '【崩坏3新春剧场】为特别的你送上祝福!',
|
||||
'timestamp': 1674273600,
|
||||
'upload_date': '20230121',
|
||||
'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
|
||||
'uploader': '果蝇轰',
|
||||
'duration': 1111.722,
|
||||
'uploader_id': '8469526',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = traverse_obj(
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
@ -315,99 +373,135 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note='Extracting festival video formats')['data']
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
'like_count': ('videoStatus', 'like', {int_or_none}),
|
||||
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
|
||||
}, get_all=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(initial_state, {
|
||||
'uploader': ('upData', 'name'),
|
||||
'uploader_id': ('upData', 'mid', {str_or_none}),
|
||||
'like_count': ('videoData', 'stat', 'like', {int_or_none}),
|
||||
'tags': ('tags', ..., 'tag_name'),
|
||||
'thumbnail': ('videoData', 'pic', {url_or_none}),
|
||||
}),
|
||||
**festival_info,
|
||||
**traverse_obj(video_data, {
|
||||
'description': 'desc',
|
||||
'timestamp': ('pubdate', {int_or_none}),
|
||||
'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
|
||||
'comment_count': ('stat', 'reply', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||
'title': title,
|
||||
'description': traverse_obj(initial_state, ('videoData', 'desc')),
|
||||
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
|
||||
'uploader': traverse_obj(initial_state, ('upData', 'name')),
|
||||
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
|
||||
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
|
||||
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
|
||||
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
|
||||
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
|
||||
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, initial_state, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'episode': '残酷',
|
||||
'episode_id': '267851',
|
||||
'episode_number': 1,
|
||||
'title': '1 残酷',
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
@ -420,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
@ -452,21 +556,65 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
@ -494,9 +642,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/audio',
|
||||
'url': 'https://space.bilibili.com/313580179/audio',
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
@ -894,15 +1042,15 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if not Cryptodome:
|
||||
if not Cryptodome.RSA:
|
||||
raise ExtractorError('pycryptodomex not found. Please install', expected=True)
|
||||
|
||||
key_data = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
|
||||
note='Downloading login key', errnote='Unable to download login key')['data']
|
||||
|
||||
public_key = Cryptodome.PublicKey.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.Cipher.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'username': username,
|
||||
@ -995,6 +1143,53 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
'info_dict': {
|
||||
'id': '340317',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1604057820,
|
||||
'upload_date': '20201030',
|
||||
'episode_number': 5,
|
||||
'title': 'E5 - My Own Steel',
|
||||
'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
|
||||
'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode': 'Episode 5',
|
||||
'comment_count': int,
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 61.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
}, {
|
||||
'start_time': 61.0,
|
||||
'end_time': 134.0,
|
||||
'title': 'Intro'
|
||||
}, {
|
||||
'start_time': 1290.0,
|
||||
'end_time': 1379.0,
|
||||
'title': 'Outro'
|
||||
}],
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# user generated content comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||
'info_dict': {
|
||||
'id': '2045730385',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# episode id without intro and outro
|
||||
@ -1054,11 +1249,69 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/detail', display_id,
|
||||
note=f'Downloading reply comment of {root_id} - {next_id}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'ps': 20, # comment's reply per page (default: 3)
|
||||
'root': root_id,
|
||||
'next': next_id,
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'parent': replies.get('parent'),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||
}
|
||||
|
||||
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
yield from self._get_comments_reply(
|
||||
root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
|
||||
|
||||
def _get_comments(self, video_id, ep_id):
|
||||
for i in itertools.count(0):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/root', video_id,
|
||||
note=f'Downloading comment page {i + 1}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'pn': i, # page number
|
||||
'ps': 20, # comment per page (default: 20)
|
||||
'oid': video_id,
|
||||
'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
|
||||
'sort_type': 1, # 1: best, 2: recent
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||
'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
|
||||
}
|
||||
if replies.get('count'):
|
||||
yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
|
||||
|
||||
if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||
video_id = ep_id or aid
|
||||
@ -1086,7 +1339,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
**self._extract_video_metadata(url, video_id, season_id),
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,9 +2,9 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
||||
for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
|
||||
'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
|
||||
'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
|
||||
'seedp29xb', 'zb10-7gsop1v78'))
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
|
167
plugins/youtube_download/yt_dlp/extractor/blerp.py
Normal file
167
plugins/youtube_download/yt_dlp/extractor/blerp.py
Normal file
@ -0,0 +1,167 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import strip_or_none, traverse_obj
|
||||
|
||||
|
||||
class BlerpIE(InfoExtractor):
|
||||
IE_NAME = 'blerp'
|
||||
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||
'info_dict': {
|
||||
'id': '6320fe8745636cb4dd677a5a',
|
||||
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||
'uploader': 'luminousaj',
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
'id': '5bc94ef4796001000498429f',
|
||||
'title': 'Yee',
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
biteById(_id: $_id) {
|
||||
...bitePageFrag
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment bitePageFrag on Bite {
|
||||
_id
|
||||
title
|
||||
userKeywords
|
||||
keywords
|
||||
color
|
||||
visibility
|
||||
isPremium
|
||||
owned
|
||||
price
|
||||
extraReview
|
||||
isAudioExists
|
||||
image {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
userReactions {
|
||||
_id
|
||||
reactions
|
||||
createdAt
|
||||
__typename
|
||||
}
|
||||
topReactions
|
||||
totalSaveCount
|
||||
saved
|
||||
blerpLibraryType
|
||||
license
|
||||
licenseMetaData
|
||||
playCount
|
||||
totalShareCount
|
||||
totalFavoriteCount
|
||||
totalAddedToBoardCount
|
||||
userCategory
|
||||
userAudioQuality
|
||||
audioCreationState
|
||||
transcription
|
||||
userTranscription
|
||||
description
|
||||
createdAt
|
||||
updatedAt
|
||||
author
|
||||
listingType
|
||||
ownerObject {
|
||||
_id
|
||||
username
|
||||
profileImage {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
transcription
|
||||
favorited
|
||||
visibility
|
||||
isCurated
|
||||
sourceUrl
|
||||
audienceRating
|
||||
strictAudienceRating
|
||||
ownerId
|
||||
reportObject {
|
||||
reportedContentStatus
|
||||
__typename
|
||||
}
|
||||
giphy {
|
||||
mp4
|
||||
gif
|
||||
__typename
|
||||
}
|
||||
audio {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
mp3 {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
''')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = {
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
}
|
||||
|
||||
return info_dict
|
@ -1,86 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, traverse_obj
|
||||
|
||||
|
||||
class BooyahBaseIE(InfoExtractor):
|
||||
_BOOYAH_SESSION_KEY = None
|
||||
|
||||
def _real_initialize(self):
|
||||
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
|
||||
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
comment_json = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
|
||||
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
|
||||
|
||||
return [{
|
||||
'id': comment.get('comment_id'),
|
||||
'author': comment.get('from_nickname'),
|
||||
'author_id': comment.get('from_uid'),
|
||||
'author_thumbnail': comment.get('from_thumbnail'),
|
||||
'text': comment.get('content'),
|
||||
'timestamp': comment.get('create_time'),
|
||||
'like_count': comment.get('like_cnt'),
|
||||
} for comment in comment_json.get('comment_list') or ()]
|
||||
|
||||
|
||||
class BooyahClipsIE(BooyahBaseIE):
|
||||
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://booyah.live/clips/13887261322952306617',
|
||||
'info_dict': {
|
||||
'id': '13887261322952306617',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'duration': 30,
|
||||
'channel_id': 90565760,
|
||||
'like_count': int,
|
||||
'title': 'Cayendo con estilo 😎',
|
||||
'uploader': '♡LɪꜱGΛMER',
|
||||
'comment_count': int,
|
||||
'uploader_id': '90565760',
|
||||
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
|
||||
'upload_date': '20220617',
|
||||
'timestamp': 1655490556,
|
||||
'modified_timestamp': 1655490556,
|
||||
'modified_date': '20220617',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
|
||||
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
|
||||
|
||||
formats = []
|
||||
for video_data in json_data['playback']['endpoint_list']:
|
||||
formats.extend(({
|
||||
'url': video_data.get('stream_url'),
|
||||
'ext': 'mp4',
|
||||
'height': video_data.get('resolution'),
|
||||
}, {
|
||||
'url': video_data.get('download_url'),
|
||||
'ext': 'mp4',
|
||||
'format_note': 'Watermarked',
|
||||
'height': video_data.get('resolution'),
|
||||
'preference': -10,
|
||||
}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(json_data, ('playback', 'name')),
|
||||
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
|
||||
'formats': formats,
|
||||
'view_count': traverse_obj(json_data, ('playback', 'views')),
|
||||
'like_count': traverse_obj(json_data, ('playback', 'likes')),
|
||||
'duration': traverse_obj(json_data, ('playback', 'duration')),
|
||||
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
|
||||
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
|
||||
'uploader': traverse_obj(json_data, ('user', 'nickname')),
|
||||
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
|
||||
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
|
||||
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
|
||||
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
|
||||
}
|
102
plugins/youtube_download/yt_dlp/extractor/boxcast.py
Normal file
102
plugins/youtube_download/yt_dlp/extractor/boxcast.py
Normal file
@ -0,0 +1,102 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class BoxCastVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://boxcast\.tv/(?:
|
||||
view-embed/|
|
||||
channel/\w+\?(?:[^#]+&)?b=|
|
||||
video-portal/(?:\w+/){2}
|
||||
)(?P<id>[\w-]+)'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
|
||||
'info_dict': {
|
||||
'id': 'da1eqqgkacngd5djlqld',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
|
||||
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
|
||||
'release_timestamp': 1670686812,
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
|
||||
'info_dict': {
|
||||
'id': 'otbpltj2kzkveo2qz3ad',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'vctwevwntun3o0ikq7af',
|
||||
'uploader': 'Legacy Christian Church',
|
||||
'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
|
||||
'info_dict': {
|
||||
'id': 'ssihlw5gvfij2by8tkev',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
|
||||
'release_date': '20230101',
|
||||
'uploader_id': 'ds25vaazhlu4ygcvffid',
|
||||
'release_timestamp': 1672543201,
|
||||
'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
|
||||
'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
|
||||
'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://childrenshealthdefense.eu/live-stream/',
|
||||
'info_dict': {
|
||||
'id': 'da1eqqgkacngd5djlqld',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
|
||||
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
|
||||
'release_timestamp': 1670686812,
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage_json_data = self._search_json(
|
||||
r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
|
||||
transform_source=js_to_json, default={})
|
||||
|
||||
# Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
|
||||
broadcast_json_data = (
|
||||
traverse_obj(webpage_json_data, ('broadcast', 'data'))
|
||||
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
|
||||
view_json_data = (
|
||||
traverse_obj(webpage_json_data, ('view', 'data'))
|
||||
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
|
||||
display_id, fatal=False) or {})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
if view_json_data.get('status') == 'recorded':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
view_json_data['playlist'], display_id)
|
||||
|
||||
return {
|
||||
'id': str(broadcast_json_data['id']),
|
||||
'title': (broadcast_json_data.get('name')
|
||||
or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
|
||||
'description': (broadcast_json_data.get('description')
|
||||
or self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||
or None),
|
||||
'thumbnail': (broadcast_json_data.get('preview')
|
||||
or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
|
||||
'uploader': broadcast_json_data.get('account_name'),
|
||||
'uploader_id': broadcast_json_data.get('account_id'),
|
||||
}
|
318
plugins/youtube_download/yt_dlp/extractor/brainpop.py
Normal file
318
plugins/youtube_download/yt_dlp/extractor/brainpop.py
Normal file
@ -0,0 +1,318 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
class BrainPOPBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brainpop'
|
||||
_ORIGIN = '' # So that _VALID_URL doesn't crash
|
||||
_LOGIN_ERRORS = {
|
||||
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
|
||||
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
|
||||
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
|
||||
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
|
||||
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
|
||||
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
|
||||
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
|
||||
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
|
||||
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
|
||||
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
|
||||
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
|
||||
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
|
||||
}
|
||||
|
||||
@classproperty
|
||||
def _VALID_URL(cls):
|
||||
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
|
||||
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
|
||||
|
||||
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
|
||||
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
|
||||
})
|
||||
for f in formats:
|
||||
f.update(extra_fields)
|
||||
return formats
|
||||
|
||||
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
|
||||
formats = []
|
||||
additional_key_formats = {
|
||||
'%s': {},
|
||||
'ad_%s': {
|
||||
'format_note': 'Audio description',
|
||||
'source_preference': -2
|
||||
}
|
||||
}
|
||||
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||
for key_quality, key_index in enumerate(('high', 'low')):
|
||||
full_key_index = additional_key_format % (key_format % key_index)
|
||||
if data.get(full_key_index):
|
||||
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||
'quality': -1 - key_quality,
|
||||
**additional_key_fields,
|
||||
**extra_fields
|
||||
}))
|
||||
return formats
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_res = self._download_json(
|
||||
'https://api.brainpop.com/api/login', None,
|
||||
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': self._ORIGIN
|
||||
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||
status_code = int_or_none(login_res['status_code'])
|
||||
if status_code != 1505:
|
||||
self.report_warning(
|
||||
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
|
||||
or f'Got status code {status_code}')
|
||||
|
||||
|
||||
class BrainPOPIE(BrainPOPBaseIE):
|
||||
_ORIGIN = 'https://www.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
|
||||
'md5': '3ead374233ae74c7f1b0029a01c972f0',
|
||||
'info_dict': {
|
||||
'id': '1f3259fa457292b4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Luther King, Jr.',
|
||||
'display_id': 'martinlutherkingjr',
|
||||
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brainpop.com/science/space/bigbang/',
|
||||
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
|
||||
'info_dict': {
|
||||
'id': 'acae52cd48c99acf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Bang',
|
||||
'display_id': 'bigbang',
|
||||
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
movie_data = self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
|
||||
'Downloading movie data JSON', 'Unable to download movie data')['data']
|
||||
topic_data = traverse_obj(self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
|
||||
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
|
||||
('data', 'topic'), expected_type=dict) or movie_data['topic']
|
||||
|
||||
if not traverse_obj(movie_data, ('access', 'allow')):
|
||||
reason = traverse_obj(movie_data, ('access', 'reason'))
|
||||
if 'logged' in reason:
|
||||
self.raise_login_required(reason, metadata_available=True)
|
||||
else:
|
||||
self.raise_no_formats(reason, video_id=display_id)
|
||||
movie_feature = movie_data['feature']
|
||||
movie_feature_data = movie_feature['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||
'language': movie_feature.get('language') or 'en',
|
||||
'language_preference': 10
|
||||
}))
|
||||
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||
'language': lang,
|
||||
'language_preference': -10
|
||||
}))
|
||||
|
||||
# TODO: Do localization fields also have subtitles?
|
||||
for name, url in movie_feature_data.items():
|
||||
lang = self._search_regex(
|
||||
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||
if lang and url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': urljoin(self._CDN_URL, url)
|
||||
})
|
||||
|
||||
return {
|
||||
'id': topic_data['topic_id'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
|
||||
def _parse_js_topic_data(self, topic_data, display_id, token):
|
||||
movie_data = topic_data['movies']
|
||||
# TODO: Are there non-burned subtitles?
|
||||
formats = self._extract_adaptive_formats(movie_data, token, display_id)
|
||||
|
||||
return {
|
||||
'id': topic_data['EntryID'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'alt_title': topic_data.get('title'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
topic_data = self._search_json(
|
||||
r'var\s+content\s*=\s*', webpage, 'content data',
|
||||
display_id, end_pattern=';')['category']['unit']['topic']
|
||||
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
|
||||
return self._parse_js_topic_data(topic_data, display_id, token)
|
||||
|
||||
|
||||
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://jr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
|
||||
_HLS_URL = 'https://hls-jr.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-jr.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
|
||||
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
|
||||
'info_dict': {
|
||||
'id': '347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emotions',
|
||||
'display_id': 'emotions',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
|
||||
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
|
||||
'info_dict': {
|
||||
'id': '29',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arctic Habitats',
|
||||
'display_id': 'arctichabitats',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://ell.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
|
||||
_HLS_URL = 'https://hls-esl.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-esl.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
|
||||
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1',
|
||||
'display_id': 'lesson1',
|
||||
'alt_title': 'Personal Pronouns',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
|
||||
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
|
||||
'info_dict': {
|
||||
'id': '101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 5',
|
||||
'display_id': 'lesson5',
|
||||
'alt_title': 'Review: Unit 6',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Español'
|
||||
_ORIGIN = 'https://esp.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/mx'
|
||||
_TESTS = [{
|
||||
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
|
||||
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
|
||||
'info_dict': {
|
||||
'id': '3893',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ecosistemas',
|
||||
'display_id': 'ecosistemas',
|
||||
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
|
||||
'md5': '98c1b9559e0e33777209c425cda7dac4',
|
||||
'info_dict': {
|
||||
'id': '7146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emily Dickinson',
|
||||
'display_id': 'emily_dickinson',
|
||||
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Français'
|
||||
_ORIGIN = 'https://fr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/fr'
|
||||
_TESTS = [{
|
||||
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
|
||||
'md5': '97e7f48af8af93f8a2be11709f239371',
|
||||
'info_dict': {
|
||||
'id': '1651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sources d\'énergie',
|
||||
'display_id': 'sourcesdenergie',
|
||||
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
|
||||
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
|
||||
'info_dict': {
|
||||
'id': '5803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plagiat',
|
||||
'display_id': 'plagiat',
|
||||
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Hebrew'
|
||||
_ORIGIN = 'https://il.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/he'
|
||||
_TESTS = [{
|
||||
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
|
||||
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
|
||||
'info_dict': {
|
||||
'id': '3782',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e993632fcda0545d9205602ec314ad67',
|
||||
'display_id': 'subjects_3782',
|
||||
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
|
||||
},
|
||||
}]
|
@ -1,117 +1,189 @@
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
'info_dict': {
|
||||
'id': 'epL0pmK1kQlT',
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('ls_tve')
|
||||
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
||||
if mobj:
|
||||
account_pid, tp_path = mobj.groups()
|
||||
release_pid = tp_path.strip('/').split('/')[-1]
|
||||
else:
|
||||
account_pid = 'HNK2IC'
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
tp_path = release_pid = metadata.get('release_pid')
|
||||
if not release_pid:
|
||||
release_pid = metadata['guid']
|
||||
tp_path = 'media/guid/2140479951/' + release_pid
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
}))
|
||||
}
|
||||
|
@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
@ -575,6 +575,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
self.raise_no_formats(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
headers.pop('Authorization', None) # or else http formats will give error 400
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
@ -895,8 +896,9 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {}
|
||||
token = smuggled_data.get('token')
|
||||
api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
|
||||
headers = {'Authorization': f'Bearer {token}'} if token else {}
|
||||
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
|
||||
if referrer:
|
||||
headers.update({
|
||||
@ -913,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
traverse_obj,
|
||||
float_or_none,
|
||||
int_or_none
|
||||
)
|
||||
from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
@ -35,6 +31,54 @@ class CallinIE(InfoExtractor):
|
||||
'episode_number': 1,
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||
'info_dict': {
|
||||
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'ext': 'ts',
|
||||
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'description': 'Or, why the government doesn’t like SpaceX',
|
||||
'channel': 'The Pull Request',
|
||||
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||
'duration': 3182.472,
|
||||
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'uploader_url': 'http://thepullrequest.com',
|
||||
'upload_date': '20220902',
|
||||
'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'series': 'The Pull Request',
|
||||
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'view_count': int,
|
||||
'uploader': 'Antonio García Martínez',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
|
||||
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'timestamp': 1662100688.005,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||
'info_dict': {
|
||||
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'ext': 'ts',
|
||||
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||
'duration': 10043.16,
|
||||
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'uploader_url': 'http://patreon.com/badfaithpodcast',
|
||||
'upload_date': '20220826',
|
||||
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'display_id': 'episode-',
|
||||
'series': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'view_count': int,
|
||||
'uploader': 'Briahna Gray',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
|
||||
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'timestamp': 1661476708.282,
|
||||
}
|
||||
}]
|
||||
|
||||
def try_get_user_name(self, d):
|
||||
@ -86,6 +130,7 @@ class CallinIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
|
85
plugins/youtube_download/yt_dlp/extractor/camfm.py
Normal file
85
plugins/youtube_download/yt_dlp/extractor/camfm.py
Normal file
@ -0,0 +1,85 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CamFMShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'playlist_mincount': 5,
|
||||
'url': 'https://camfm.co.uk/shows/soul-mining/',
|
||||
'info_dict': {
|
||||
'id': 'soul-mining',
|
||||
'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
|
||||
'title': 'Soul Mining',
|
||||
'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
page = self._download_webpage(url, show_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE)
|
||||
for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)],
|
||||
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||
r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)),
|
||||
'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False),
|
||||
'description': clean_html(get_element_by_class('small-12 medium-8 cell', page))
|
||||
}
|
||||
|
||||
|
||||
class CamFMEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://camfm.co.uk/player/43336',
|
||||
'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
|
||||
'info_dict': {
|
||||
'id': '43336',
|
||||
'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20230516',
|
||||
'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
|
||||
'timestamp': 1684263600,
|
||||
'series': 'AITAA: Am I the Agony Aunt?',
|
||||
'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
|
||||
'categories': ['Entertainment'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
page = self._download_webpage(url, episode_id)
|
||||
audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id)
|
||||
|
||||
caption = get_element_by_class('caption', page)
|
||||
series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption))
|
||||
|
||||
card_section = get_element_by_class('card-section', page)
|
||||
date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': join_nonempty(series, date, delim=' - '),
|
||||
'formats': traverse_obj(audios, (..., 'formats', ...)),
|
||||
'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings
|
||||
'series': series,
|
||||
'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)),
|
||||
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||
r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)',
|
||||
page, 'thumbnail', fatal=False)),
|
||||
'categories': get_elements_by_class('label', caption),
|
||||
'was_live': True,
|
||||
}
|
@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils import int_or_none, url_or_none
|
||||
|
||||
|
||||
class CamModelsIE(InfoExtractor):
|
||||
@ -17,32 +13,11 @@ class CamModelsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, headers=self.geo_verification_headers())
|
||||
|
||||
manifest_root = self._html_search_regex(
|
||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||
|
||||
if not manifest_root:
|
||||
ERRORS = (
|
||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||
('in a private show', 'This user is in a private show'),
|
||||
('is currently performing LIVE', 'This model is currently performing live'),
|
||||
)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
error = message
|
||||
expected = True
|
||||
break
|
||||
else:
|
||||
error = 'Unable to find manifest URL root'
|
||||
expected = False
|
||||
raise ExtractorError(error, expected=expected)
|
||||
|
||||
manifest = self._download_json(
|
||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for format_id, format_dict in manifest['formats'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
@ -82,12 +57,20 @@ class CamModelsIE(InfoExtractor):
|
||||
'quality': -10,
|
||||
})
|
||||
else:
|
||||
if format_id == 'jpeg':
|
||||
thumbnails.append({
|
||||
'url': f['url'],
|
||||
'width': f['width'],
|
||||
'height': f['height'],
|
||||
'format_id': f['format_id'],
|
||||
})
|
||||
continue
|
||||
formats.append(f)
|
||||
|
||||
return {
|
||||
'id': user_id,
|
||||
'title': user_id,
|
||||
'thumbnails': thumbnails,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
|
@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
|
||||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# if '/blocage' in response.url:
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
@ -1,383 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'Nachtwacht: De Greystook',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8_native',
|
||||
}
|
||||
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
data = None
|
||||
if site_id != 'vrtvideo':
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
|
||||
video_id, note='refreshtoken: Retrieve vrtnutoken',
|
||||
errnote='refreshtoken failed')['vrtnutoken']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({'Content-Type': 'application/json; charset=utf-8'})
|
||||
vrtPlayerToken = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', headers=headers, data=json.dumps({
|
||||
'identityToken': vrtnutoken
|
||||
}).encode('utf-8'))['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'vrtPlayerToken': vrtPlayerToken,
|
||||
'client': 'null',
|
||||
}, expected_status=400)
|
||||
if 'title' not in data:
|
||||
code = data.get('code')
|
||||
if code == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required()
|
||||
elif code == 'INVALID_LOCATION':
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
raise ExtractorError(data.get('message') or code, expected=True)
|
||||
|
||||
# Note: The title may be an empty string
|
||||
title = data['title'] or f'{site_id} {video_id}'
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
format_type = format_type.upper()
|
||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||
m3u8_id=format_type, fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id=format_type, fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
elif format_type == 'HSS':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
format_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CanvasEenIE(InfoExtractor):
|
||||
IE_DESC = 'canvas.be and een.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ed66976748d12350b118455979cca293',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||
'ext': 'flv',
|
||||
'title': 'De afspraak veilt voor de Warmste Week',
|
||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
|
||||
'display_id': 'pieter-0167',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pieter 0167',
|
||||
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2553.08,
|
||||
'subtitles': {
|
||||
'nl': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Pagina niet gevonden',
|
||||
}, {
|
||||
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
|
||||
'display_id': 'emma-pakt-thilly-aan',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emma pakt Thilly aan',
|
||||
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 118.24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None))
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
|
||||
class VrtNUIE(GigyaBaseIE):
|
||||
IE_DESC = 'VrtNU.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Available via old API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'ext': 'mp4',
|
||||
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
|
||||
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
|
||||
'duration': 1457.04,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'series': 'Postbus X',
|
||||
'season': 'Seizoen 1989',
|
||||
'season_number': 1989,
|
||||
'episode': 'De zwarte weduwe',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1595822400,
|
||||
'upload_date': '20200727',
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# Only available via new API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aflevering 5',
|
||||
'description': 'Wie valt door de mand tijdens een missie?',
|
||||
'duration': 2967.06,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 5,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
_CONTEXT_ID = 'R3595707040'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
auth_info = self._gigya_login({
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'authMode': 'cookie',
|
||||
})
|
||||
|
||||
if auth_info.get('errorDetails'):
|
||||
raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry
|
||||
login_attempt = 1
|
||||
while login_attempt <= 3:
|
||||
try:
|
||||
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
|
||||
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
|
||||
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
|
||||
|
||||
post_data = {
|
||||
'UID': auth_info['UID'],
|
||||
'UIDSignature': auth_info['UIDSignature'],
|
||||
'signatureTimestamp': auth_info['signatureTimestamp'],
|
||||
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
||||
}
|
||||
|
||||
self._request_webpage(
|
||||
'https://login.vrt.be/perform_login',
|
||||
None, note='Performing login', errnote='perform login failed',
|
||||
headers={}, query={
|
||||
'client_id': 'vrtnu-site'
|
||||
}, data=urlencode_postdata(post_data))
|
||||
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
login_attempt += 1
|
||||
self.report_warning('Authentication failed')
|
||||
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<nui-media[^>]+>)', webpage, 'media element'))
|
||||
video_id = attrs['videoid']
|
||||
publication_id = attrs.get('publicationid')
|
||||
if publication_id:
|
||||
video_id = publication_id + '$' + video_id
|
||||
|
||||
page = (self._parse_json(self._search_regex(
|
||||
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
|
||||
default='{}'), video_id, fatal=False) or {}).get('page') or {}
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
||||
|
||||
class DagelijkseKostIE(InfoExtractor):
|
||||
IE_DESC = 'dagelijksekost.een.be'
|
||||
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||
'display_id': 'hachis-parmentier-met-witloof',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hachis parmentier met witloof',
|
||||
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 283.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(get_element_by_class(
|
||||
'dish-metadata__title', webpage
|
||||
) or self._html_search_meta(
|
||||
'twitter:title', webpage))
|
||||
|
||||
description = clean_html(get_element_by_class(
|
||||
'dish-description', webpage)
|
||||
) or self._html_search_meta(
|
||||
('description', 'twitter:description', 'og:description'),
|
||||
webpage)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@ -8,14 +8,16 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -159,7 +161,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
'skip': 'Geo-restricted to Canada and no longer available',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
@ -172,6 +174,9 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
@ -184,6 +189,28 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -197,12 +224,13 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
@ -245,6 +273,9 @@ class CBCGemIE(InfoExtractor):
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
@ -346,7 +377,9 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id)
|
||||
video_info = self._download_json(
|
||||
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
|
||||
video_id, expected_status=426)
|
||||
|
||||
email, password = self._get_login_info()
|
||||
if email and password:
|
||||
@ -401,7 +434,7 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
class CBCGemPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca:playlist'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
# TV show playlist, all public videos
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
|
||||
@ -411,6 +444,9 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
||||
|
||||
@ -418,7 +454,7 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||
match = self._match_valid_url(url)
|
||||
season_id = match.group('id')
|
||||
show = match.group('show')
|
||||
show_info = self._download_json(self._API_BASE + show, season_id)
|
||||
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
|
||||
season = int(match.group('season'))
|
||||
|
||||
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
||||
@ -470,8 +506,9 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||
|
||||
class CBCGemLiveIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca:live'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live/920604739687',
|
||||
'info_dict': {
|
||||
'title': 'Ottawa',
|
||||
@ -485,34 +522,74 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live/44',
|
||||
'info_dict': {
|
||||
'id': '44',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
'title': r're:^Ottawa [0-9\-: ]+',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live-event/10835',
|
||||
'info_dict': {
|
||||
'id': '10835',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
|
||||
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||
'timestamp': 1679706000,
|
||||
'upload_date': '20230325',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
}
|
||||
|
||||
# It's unclear where the chars at the end come from, but they appear to be
|
||||
# constant. Might need updating in the future.
|
||||
# There are two URLs, some livestreams are in one, and some
|
||||
# in the other. The JSON schema is the same for both.
|
||||
_API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT']
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||
|
||||
for api_url in self._API_URLS:
|
||||
video_info = next((
|
||||
stream for stream in self._download_json(api_url, video_id)['entries']
|
||||
if stream.get('guid') == video_id), None)
|
||||
if video_info:
|
||||
break
|
||||
else:
|
||||
# Two types of metadata JSON
|
||||
if not video_info.get('formattedIdMedia'):
|
||||
video_info = traverse_obj(
|
||||
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||
get_all=False, default={})
|
||||
|
||||
video_stream_id = video_info.get('formattedIdMedia')
|
||||
if not video_stream_id:
|
||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'mpx',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': video_info['content'][0]['url'],
|
||||
'id': video_id,
|
||||
'title': video_info.get('title'),
|
||||
'description': video_info.get('description'),
|
||||
'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
|
||||
'thumbnail': video_info.get('cbc$staticImage'),
|
||||
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||
'is_live': True,
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('images', 'card', 'url'),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
})
|
||||
}
|
||||
|
@ -1,8 +1,14 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
@ -162,3 +168,110 @@ class CBSIE(CBSBaseIE):
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
||||
})
|
||||
|
||||
|
||||
class ParamountPressExpressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
|
||||
'md5': '56631dbcadaab980d1fc47cb7b76cba4',
|
||||
'info_dict': {
|
||||
'id': '6322981580112',
|
||||
'ext': 'mp4',
|
||||
'title': 'I’m Felicia',
|
||||
'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
|
||||
'uploader_id': '6055873637001',
|
||||
'upload_date': '20230320',
|
||||
'timestamp': 1679334960,
|
||||
'duration': 49.557,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
|
||||
'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
|
||||
'info_dict': {
|
||||
'id': '6323036027112',
|
||||
'ext': 'mp4',
|
||||
'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
|
||||
'description': 'md5:b929867a357aac5544b783d834c78383',
|
||||
'uploader_id': '6055873637001',
|
||||
'upload_date': '20230321',
|
||||
'timestamp': 1679430180,
|
||||
'duration': 132.032,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
|
||||
'info_dict': {
|
||||
'id': 'OX9wJWOcqck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
|
||||
'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
|
||||
'uploader': 'Paramount Plus',
|
||||
'uploader_id': '@paramountplus',
|
||||
'uploader_url': 'http://www.youtube.com/@paramountplus',
|
||||
'channel': 'Paramount Plus',
|
||||
'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
|
||||
'upload_date': '20230316',
|
||||
'duration': 88,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': ['Rugrats'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
|
||||
'info_dict': {
|
||||
'id': '_ljssSoDLkw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
|
||||
'description': 'md5:39581bcc3fd810209b642609f448af70',
|
||||
'uploader': 'SHOWTIME',
|
||||
'uploader_id': '@Showtime',
|
||||
'uploader_url': 'http://www.youtube.com/@Showtime',
|
||||
'channel': 'SHOWTIME',
|
||||
'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||
'upload_date': '20230209',
|
||||
'duration': 49,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': 'count:27',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
|
||||
if is_youtube:
|
||||
return self.url_result(display_id, YoutubeIE)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
|
||||
token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
|
||||
|
||||
player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
|
||||
account_id = player.get('data-account') or '6055873637001'
|
||||
player_id = player.get('data-player') or 'OtLKgXlO9F'
|
||||
embed = player.get('data-embed') or 'default'
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
{'token': token}), BrightcoveNewIE)
|
||||
|
@ -1,116 +0,0 @@
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
|
||||
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||
'info_dict': {
|
||||
'id': '3401037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\KCBSTV',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\Yahoo',
|
||||
'Syndication\\Tribune',
|
||||
'Syndication\\Curb.tv',
|
||||
'Content\\News'
|
||||
],
|
||||
'tags': ['CBS 2 News Evening'],
|
||||
},
|
||||
}, {
|
||||
# SendtoNews embed
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, sendtonews_url),
|
||||
ie=SendtoNewsIE.ie_key())
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
|
||||
'released date', default=None)) or parse_iso8601(
|
||||
self._html_search_meta('uploadDate', webpage))
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
|
||||
return info_dict
|
@ -1,36 +1,153 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import zlib
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from .paramountplus import ParamountPlusIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||
class CBSNewsBaseIE(InfoExtractor):
|
||||
_LOCALES = {
|
||||
'atlanta': None,
|
||||
'baltimore': 'BAL',
|
||||
'boston': 'BOS',
|
||||
'chicago': 'CHI',
|
||||
'colorado': 'DEN',
|
||||
'detroit': 'DET',
|
||||
'losangeles': 'LA',
|
||||
'miami': 'MIA',
|
||||
'minnesota': 'MIN',
|
||||
'newyork': 'NY',
|
||||
'philadelphia': 'PHI',
|
||||
'pittsburgh': 'PIT',
|
||||
'sacramento': 'SAC',
|
||||
'sanfrancisco': 'SF',
|
||||
'texas': 'DAL',
|
||||
}
|
||||
_LOCALE_RE = '|'.join(map(re.escape, _LOCALES))
|
||||
_ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl'
|
||||
|
||||
def _get_item(self, webpage, display_id):
|
||||
return traverse_obj(self._search_json(
|
||||
r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id,
|
||||
default={}), ('items', 0, {dict})) or {}
|
||||
|
||||
def _get_video_url(self, item):
|
||||
return traverse_obj(item, 'video', 'video2', expected_type=url_or_none)
|
||||
|
||||
def _extract_playlist(self, webpage, playlist_id):
|
||||
entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall(
|
||||
r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)]
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
||||
|
||||
def _extract_video(self, item, video_url, video_id):
|
||||
if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4':
|
||||
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||
|
||||
else:
|
||||
manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information')
|
||||
|
||||
anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None)
|
||||
# Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
|
||||
if anvato_id:
|
||||
return self.url_result(
|
||||
smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}),
|
||||
AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
|
||||
|
||||
formats, _ = self._parse_m3u8_formats_and_subtitles(
|
||||
manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id)
|
||||
|
||||
def get_subtitles(subs_url):
|
||||
return {
|
||||
'en': [{
|
||||
'url': subs_url,
|
||||
'ext': 'dfxp', # TTAF1
|
||||
}],
|
||||
} if url_or_none(subs_url) else None
|
||||
|
||||
episode_meta = traverse_obj(item, {
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}) if item.get('isFullEpisode') else {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(item, {
|
||||
'title': (None, ('fulltitle', 'title')),
|
||||
'description': 'dek',
|
||||
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'subtitles': ('captions', {get_subtitles}),
|
||||
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||
'is_live': ('type', {lambda x: x == 'live'}),
|
||||
}, get_all=False),
|
||||
**episode_meta,
|
||||
}
|
||||
|
||||
|
||||
class CBSNewsEmbedIE(CBSNewsBaseIE):
|
||||
IE_NAME = 'cbsnews:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cops investigate gorilla incident at Cincinnati Zoo',
|
||||
'description': 'md5:fee7441ab8aaeb3c693482394738102b',
|
||||
'duration': 350,
|
||||
'timestamp': 1464719713,
|
||||
'upload_date': '20160531',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode(
|
||||
urllib.parse.unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {}
|
||||
|
||||
video_id = item['mpxRefId']
|
||||
video_url = self._get_video_url(item)
|
||||
if not video_url:
|
||||
# Old embeds redirect user to ParamountPlus but most links are 404
|
||||
pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
|
||||
try:
|
||||
self._request_webpage(HEADRequest(pplus_url), video_id)
|
||||
return self.url_result(pplus_url, ParamountPlusIE)
|
||||
except ExtractorError:
|
||||
self.raise_no_formats('This video is no longer available', True, video_id)
|
||||
|
||||
return self._extract_video(item, video_url, video_id)
|
||||
|
||||
|
||||
class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||
class CBSNewsIE(CBSNewsBaseIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -47,10 +164,7 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1476046464,
|
||||
'upload_date': '20161009',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is no longer available',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
@ -61,48 +175,234 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396650660,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'ext': 'dfxp',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
# 48 hours
|
||||
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||
'info_dict': {
|
||||
'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved',
|
||||
'title': 'Cold as Ice',
|
||||
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/',
|
||||
'info_dict': {
|
||||
'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE',
|
||||
'ext': 'mp4',
|
||||
'title': 'CBS Evening News, March 28, 2023',
|
||||
'description': 'md5:db20615aae54adc1d55a1fd69dc75d13',
|
||||
'duration': 1189,
|
||||
'timestamp': 1680042600,
|
||||
'upload_date': '20230328',
|
||||
'season': 'Season 2023',
|
||||
'season_number': 2023,
|
||||
'episode': 'Episode 83',
|
||||
'episode_number': 83,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
|
||||
entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
||||
playlist = self._extract_playlist(webpage, display_id)
|
||||
if playlist:
|
||||
return playlist
|
||||
|
||||
item = self._parse_json(self._html_search_regex(
|
||||
r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
|
||||
webpage, 'video JSON info'), display_id)['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
item = self._get_item(webpage, display_id)
|
||||
video_id = item.get('mpxRefId') or display_id
|
||||
video_url = self._get_video_url(item)
|
||||
if not video_url:
|
||||
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
|
||||
|
||||
return self._extract_video(item, video_url, video_id)
|
||||
|
||||
|
||||
class CBSLocalBaseIE(CBSNewsBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
item = self._get_item(webpage, display_id)
|
||||
video_id = item.get('mpxRefId') or display_id
|
||||
anvato_id = None
|
||||
video_url = self._get_video_url(item)
|
||||
|
||||
if not video_url:
|
||||
anv_params = self._search_regex(
|
||||
r'<iframe[^>]+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"',
|
||||
webpage, 'Anvato URL', default=None)
|
||||
|
||||
if not anv_params:
|
||||
playlist = self._extract_playlist(webpage, display_id)
|
||||
if playlist:
|
||||
return playlist
|
||||
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
|
||||
|
||||
anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id)
|
||||
anvato_id = anv_data['v']
|
||||
return self.url_result(
|
||||
smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', {
|
||||
'token': anv_data.get('token') or 'default',
|
||||
}), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
|
||||
|
||||
return self._extract_video(item, video_url, video_id)
|
||||
|
||||
|
||||
class CBSLocalIE(CBSLocalBaseIE):
|
||||
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# Anvato video via defaultPayload JSON
|
||||
'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/',
|
||||
'info_dict': {
|
||||
'id': '6376747',
|
||||
'ext': 'mp4',
|
||||
'title': '1st cannabis dispensary opens in Queens',
|
||||
'description': 'The dispensary is women-owned and located in Jamaica.',
|
||||
'uploader': 'CBS',
|
||||
'duration': 20,
|
||||
'timestamp': 1680193657,
|
||||
'upload_date': '20230330',
|
||||
'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'],
|
||||
'tags': 'count:11',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'_old_archive_ids': ['cbslocal 6376747'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# cbsnews.com video via defaultPayload JSON
|
||||
'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/',
|
||||
'info_dict': {
|
||||
'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3',
|
||||
'ext': 'mp4',
|
||||
'title': 'the city is sounding the alarm on dangerous social media challenges',
|
||||
'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6',
|
||||
'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg',
|
||||
'duration': 41.0,
|
||||
'timestamp': 1680196615,
|
||||
'upload_date': '20230330',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
|
||||
class CBSLocalArticleIE(CBSLocalBaseIE):
|
||||
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# Anvato video via iframe embed
|
||||
'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/',
|
||||
'playlist_count': 2,
|
||||
'info_dict': {
|
||||
'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service',
|
||||
'title': 'MTA station agents begin leaving their booths to provide more direct customer service',
|
||||
'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/',
|
||||
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||
'info_dict': {
|
||||
'id': '3401037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
},
|
||||
'skip': 'Video has been removed',
|
||||
}]
|
||||
|
||||
|
||||
class CBSNewsLiveBaseIE(CBSNewsBaseIE):
|
||||
def _get_id(self, url):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._get_id(url)
|
||||
if not video_id:
|
||||
raise ExtractorError('Livestream is not available', expected=True)
|
||||
|
||||
data = traverse_obj(self._download_json(
|
||||
'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={
|
||||
'partner': 'cbsnsite',
|
||||
'edition': video_id,
|
||||
'type': 'live',
|
||||
}), ('navigation', 'data', 0, {dict}))
|
||||
|
||||
video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False)
|
||||
if not video_url:
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(data, {
|
||||
'title': 'headline',
|
||||
'description': 'rundown_slug',
|
||||
'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CBSLocalLiveIE(CBSNewsLiveBaseIE):
|
||||
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P<id>{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbsnews.com/losangeles/live/',
|
||||
'info_dict': {
|
||||
'id': 'CBSN-LA',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _get_id(self, url):
|
||||
return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s')
|
||||
|
||||
|
||||
class CBSNewsLiveIE(CBSNewsLiveBaseIE):
|
||||
IE_NAME = 'cbsnews:live'
|
||||
IE_DESC = 'CBS News Livestream'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbsnews.com/live/',
|
||||
'info_dict': {
|
||||
'id': 'CBSN-US',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': r're:\w+ \w+ CRISPIN RUNDOWN',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _get_id(self, url):
|
||||
return 'CBSN-US'
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
@ -111,7 +411,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
||||
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@ -120,7 +420,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'duration': 334,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -131,13 +431,13 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'dvr_slug': display_id,
|
||||
})
|
||||
|
||||
formats = self._extract_akamai_formats(video_info['url'], display_id)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': formats,
|
||||
'formats': self._extract_akamai_formats(video_info['url'], display_id),
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'headline',
|
||||
'thumbnail': ('thumbnail_url_hd', {url_or_none}),
|
||||
'duration': ('segmentDur', {parse_duration}),
|
||||
}),
|
||||
}
|
||||
|
@ -1,20 +1,20 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
USER_AGENTS = {
|
||||
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
|
||||
}
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.url)
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
req = Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
||||
req.headers['x-addr'] = '127.0.0.1'
|
||||
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
req.headers['User-Agent'] = user_agent
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
req = Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
|
@ -1,93 +1,123 @@
|
||||
import json
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class ChilloutzoneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w-]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||
'url': 'https://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||
'info_dict': {
|
||||
'id': 'enemene-meck-alle-katzen-weg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enemene Meck - Alle Katzen weg',
|
||||
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||
'duration': 24,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at YouTube',
|
||||
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||
'url': 'https://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||
'info_dict': {
|
||||
'id': '1YVQaAgHyRU',
|
||||
'ext': 'mp4',
|
||||
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||
'uploader': 'BuzzFeedVideo',
|
||||
'uploader_id': 'BuzzFeedVideo',
|
||||
'uploader_id': '@BuzzFeedVideo',
|
||||
'upload_date': '20131105',
|
||||
'availability': 'public',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/1YVQaAgHyRU/maxresdefault.jpg',
|
||||
'tags': 'count:41',
|
||||
'like_count': int,
|
||||
'playable_in_embed': True,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCpko_-a4wgz2u_DgDgd9fqA',
|
||||
'chapters': 'count:6',
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCpko_-a4wgz2u_DgDgd9fqA',
|
||||
'duration': 100,
|
||||
'uploader_url': 'http://www.youtube.com/@BuzzFeedVideo',
|
||||
'channel_follower_count': int,
|
||||
'channel': 'BuzzFeedVideo',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at Vimeo',
|
||||
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||
'url': 'https://www.chilloutzone.net/video/icon-blending.html',
|
||||
'md5': '2f9d6850ec567b24f0f4fa143b9aa2f9',
|
||||
'info_dict': {
|
||||
'id': '85523671',
|
||||
'id': 'LLNkHpSjBfc',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Sunday Times - Icons',
|
||||
'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
|
||||
'uploader': 'Us',
|
||||
'uploader_id': 'usfilms',
|
||||
'upload_date': '20140131'
|
||||
'title': 'The Sunday Times Making of Icons',
|
||||
'description': 'md5:b9259fcf63a1669e42001e5db677f02a',
|
||||
'uploader': 'MadFoxUA',
|
||||
'uploader_id': '@MadFoxUA',
|
||||
'upload_date': '20140204',
|
||||
'channel_id': 'UCSZa9Y6-Vl7c11kWMcbAfCw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCSZa9Y6-Vl7c11kWMcbAfCw',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/@MadFoxUA',
|
||||
'duration': 66,
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'playable_in_embed': True,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/LLNkHpSjBfc/maxresdefault.jpg',
|
||||
'categories': ['Comedy'],
|
||||
'availability': 'public',
|
||||
'tags': [],
|
||||
'channel': 'MadFoxUA',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.chilloutzone.net/video/ordentlich-abgeschuettelt.html',
|
||||
'info_dict': {
|
||||
'id': 'ordentlich-abgeschuettelt',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ordentlich abgeschüttelt',
|
||||
'description': 'md5:d41541966b75d3d1e8ea77a94ea0d329',
|
||||
'duration': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
b64_data = self._html_search_regex(
|
||||
r'var cozVidData\s*=\s*"([^"]+)"', webpage, 'video data')
|
||||
info = self._parse_json(base64.b64decode(b64_data).decode(), video_id)
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
video_url = info.get('mediaUrl')
|
||||
native_platform = info.get('nativePlatform')
|
||||
|
||||
# get video information from dict
|
||||
video_url = video_info_dict['mediaUrl']
|
||||
description = clean_html(video_info_dict.get('description'))
|
||||
title = video_info_dict['title']
|
||||
native_platform = video_info_dict['nativePlatform']
|
||||
native_video_id = video_info_dict['nativeVideoId']
|
||||
source_priority = video_info_dict['sourcePriority']
|
||||
|
||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||
if native_platform is None:
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||
|
||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||
# the own CDN
|
||||
if source_priority == 'native':
|
||||
if native_platform and info.get('sourcePriority') == 'native':
|
||||
native_video_id = info['nativeVideoId']
|
||||
if native_platform == 'youtube':
|
||||
return self.url_result(native_video_id, ie='Youtube')
|
||||
if native_platform == 'vimeo':
|
||||
return self.url_result(
|
||||
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||
return self.url_result(native_video_id, 'Youtube')
|
||||
elif native_platform == 'vimeo':
|
||||
return self.url_result(f'https://vimeo.com/{native_video_id}', 'Vimeo')
|
||||
|
||||
if not video_url:
|
||||
raise ExtractorError('No video found')
|
||||
elif not video_url:
|
||||
# Possibly a standard youtube embed?
|
||||
# TODO: Investigate if site still does this (there are no tests for it)
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
**traverse_obj(info, {
|
||||
'title': 'title',
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('videoLength', {int_or_none}),
|
||||
'width': ('videoWidth', {int_or_none}),
|
||||
'height': ('videoHeight', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
import json
|
||||
import urllib.error
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
|
||||
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
|
||||
if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
|
||||
or isinstance(e.cause, json.JSONDecodeError)):
|
||||
self.raise_login_required(method='cookies')
|
||||
raise
|
||||
|
@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
if rcid:
|
||||
webpage = self._download_webpage(url, None, note='Getting video ID')
|
||||
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').geturl()
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').url
|
||||
mobj = self._match_valid_url(url)
|
||||
subdomain = mobj.group('subdomain')
|
||||
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
|
||||
|
61
plugins/youtube_download/yt_dlp/extractor/clipchamp.py
Normal file
61
plugins/youtube_download/yt_dlp/extractor/clipchamp.py
Normal file
@ -0,0 +1,61 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ClipchampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||
'info_dict': {
|
||||
'id': 'gRXZ4ZhdDaU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled video',
|
||||
'uploader': 'Alexander Schwartz',
|
||||
'timestamp': 1680805580,
|
||||
'upload_date': '20230406',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||
|
||||
storage_location = data.get('storage_location')
|
||||
if storage_location != 'cf_stream':
|
||||
raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
|
||||
|
||||
path = data['download_url']
|
||||
iframe = self._download_webpage(
|
||||
f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
|
||||
subdomain = self._search_regex(
|
||||
r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
|
||||
'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
|
||||
**traverse_obj(data, {
|
||||
'title': ('project', 'project_name', {str}),
|
||||
'timestamp': ('created_at', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -2,7 +2,7 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P<id>[0-9a-z]{6})'
|
||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P<id>[0-9a-z]{6})'
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -25,6 +25,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
|
@ -13,6 +13,7 @@ import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
@ -21,9 +22,21 @@ import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
@ -32,8 +45,8 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
UnsupportedError,
|
||||
@ -56,7 +69,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
network_exceptions,
|
||||
netrc_from_content,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
@ -66,7 +79,6 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@ -78,8 +90,6 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@ -132,6 +142,7 @@ class InfoExtractor:
|
||||
is parsed from a string (in case of
|
||||
fragmented media)
|
||||
for MSS - URL of the ISM manifest.
|
||||
* request_data Data to send in POST request to the URL
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media:
|
||||
@ -219,7 +230,8 @@ class InfoExtractor:
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* has_drm True if the format has DRM and cannot be downloaded.
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
@ -285,6 +297,7 @@ class InfoExtractor:
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
channel_follower_count: Number of followers of the channel.
|
||||
channel_is_verified: Whether the channel is verified on the platform.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@ -313,6 +326,11 @@ class InfoExtractor:
|
||||
* "author" - human-readable name of the comment author
|
||||
* "author_id" - user ID of the comment author
|
||||
* "author_thumbnail" - The thumbnail of the comment author
|
||||
* "author_url" - The url to the comment author's page
|
||||
* "author_is_verified" - Whether the author is verified
|
||||
on the platform
|
||||
* "author_is_uploader" - Whether the comment is made by
|
||||
the video uploader
|
||||
* "id" - Comment ID
|
||||
* "html" - Comment as HTML
|
||||
* "text" - Plain text of the comment
|
||||
@ -324,8 +342,8 @@ class InfoExtractor:
|
||||
* "dislike_count" - Number of negative ratings of the comment
|
||||
* "is_favorited" - Whether the comment is marked as
|
||||
favorite by the video uploader
|
||||
* "author_is_uploader" - Whether the comment is made by
|
||||
the video uploader
|
||||
* "is_pinned" - Whether the comment is pinned to
|
||||
the top of the comments
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||
should allow to get the same result again. (It will be set
|
||||
@ -349,6 +367,10 @@ class InfoExtractor:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
* "title" (optional, string)
|
||||
heatmap: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the data point in seconds
|
||||
* "end_time" - The end time of the data point in seconds
|
||||
* "value" - The normalized value of the data point (float between 0 and 1)
|
||||
playable_in_embed: Whether this video is allowed to play in embedded
|
||||
players on other sites. Can be True (=always allowed),
|
||||
False (=never allowed), None (=unknown), or a string
|
||||
@ -460,8 +482,8 @@ class InfoExtractor:
|
||||
|
||||
|
||||
Subclasses of this should also be added to the list of extractors and
|
||||
should define a _VALID_URL regexp and, re-define the _real_extract() and
|
||||
(optionally) _real_initialize() methods.
|
||||
should define _VALID_URL as a regexp or a Sequence of regexps, and
|
||||
re-define the _real_extract() and (optionally) _real_initialize() methods.
|
||||
|
||||
Subclasses may also override suitable() if necessary, but ensure the function
|
||||
signature is preserved and that this function imports everything it needs
|
||||
@ -524,7 +546,7 @@ class InfoExtractor:
|
||||
_EMBED_REGEX = []
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
@ -551,8 +573,8 @@ class InfoExtractor:
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url)
|
||||
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
||||
return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@ -707,11 +729,11 @@ class InfoExtractor:
|
||||
except UnsupportedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
e.video_id = e.video_id or self.get_temp_id(url),
|
||||
e.video_id = e.video_id or self.get_temp_id(url)
|
||||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
|
||||
@ -770,20 +792,25 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, urllib.error.HTTPError)
|
||||
assert isinstance(err, HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
return expected_status(err.status) is True
|
||||
else:
|
||||
return err.code in variadic(expected_status)
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
return sanitized_Request(url_or_request, data, headers or {})
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
url_or_request = urllib_req_to_req(url_or_request)
|
||||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
"""
|
||||
@ -819,14 +846,9 @@ class InfoExtractor:
|
||||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError):
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
return err.response
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
@ -958,11 +980,11 @@ class InfoExtractor:
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.geturl(), video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
@ -1020,7 +1042,7 @@ class InfoExtractor:
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
@ -1094,7 +1116,7 @@ class InfoExtractor:
|
||||
while True:
|
||||
try:
|
||||
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
@ -1280,45 +1302,48 @@ class InfoExtractor:
|
||||
return clean_html(res)
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self.get_param('usenetrc', False):
|
||||
try:
|
||||
cmd = self.get_param('netrc_cmd')
|
||||
if cmd:
|
||||
cmd = cmd.replace('{}', netrc_machine)
|
||||
self.to_screen(f'Executing command: {cmd}')
|
||||
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
|
||||
if ret != 0:
|
||||
raise OSError(f'Command returned error code {ret}')
|
||||
info = netrc_from_content(stdout).authenticators(netrc_machine)
|
||||
|
||||
elif self.get_param('usenetrc', False):
|
||||
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
|
||||
if os.path.isdir(netrc_file):
|
||||
netrc_file = os.path.join(netrc_file, '.netrc')
|
||||
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
|
||||
|
||||
return username, password
|
||||
else:
|
||||
return None, None
|
||||
if not info:
|
||||
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
First look for the manually specified credentials using username_option
|
||||
and password_option as keys in params dictionary. If no such credentials
|
||||
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||
value.
|
||||
are available try the netrc_cmd if it is defined or look in the
|
||||
netrc file using the netrc_machine or _NETRC_MACHINE value.
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
username = self.get_param(username_option)
|
||||
if username is not None:
|
||||
password = self.get_param(password_option)
|
||||
else:
|
||||
try:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(f'Failed to parse .netrc: {err}')
|
||||
return None, None
|
||||
return username, password
|
||||
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
@ -1338,7 +1363,7 @@ class InfoExtractor:
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
|
||||
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
|
||||
% {'prop': re.escape(prop), 'sep': '(?::|[:-])'})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
@ -1788,7 +1813,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
manifest, urlh = res
|
||||
manifest_url = urlh.geturl()
|
||||
manifest_url = urlh.url
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
|
||||
@ -1947,7 +1972,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
m3u8_url = urlh.url
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
@ -1961,11 +1986,7 @@ class InfoExtractor:
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), m3u8_doc)
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
@ -2063,6 +2084,7 @@ class InfoExtractor:
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
} for idx in _extract_m3u8_playlist_indices(manifest_url))
|
||||
|
||||
@ -2122,6 +2144,7 @@ class InfoExtractor:
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
}
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
@ -2225,18 +2248,10 @@ class InfoExtractor:
|
||||
if res is False:
|
||||
assert not fatal
|
||||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
|
||||
namespace=self._parse_smil_namespace(smil))
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
@ -2250,7 +2265,7 @@ class InfoExtractor:
|
||||
return {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
@ -2262,9 +2277,8 @@ class InfoExtractor:
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
formats, subtitles = self._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
@ -2303,7 +2317,14 @@ class InfoExtractor:
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self._report_ignoring_subs('SMIL')
|
||||
return fmts
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
@ -2311,7 +2332,7 @@ class InfoExtractor:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
@ -2359,8 +2380,9 @@ class InfoExtractor:
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
@ -2381,11 +2403,15 @@ class InfoExtractor:
|
||||
f4m_url += urllib.parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
self._merge_subtitles(mpd_subs, target=subtitles)
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
|
||||
src_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(ism_formats)
|
||||
self._merge_subtitles(ism_subs, target=subtitles)
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
@ -2416,7 +2442,10 @@ class InfoExtractor:
|
||||
'format_note': 'SMIL storyboards',
|
||||
})
|
||||
|
||||
return formats
|
||||
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
self._merge_subtitles(smil_subs, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
@ -2442,7 +2471,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
xspf, urlh = res
|
||||
xspf_url = urlh.geturl()
|
||||
xspf_url = urlh.url
|
||||
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
@ -2513,7 +2542,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.geturl()
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
@ -2884,7 +2913,7 @@ class InfoExtractor:
|
||||
if ism_doc is None:
|
||||
return [], {}
|
||||
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
|
||||
|
||||
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
@ -2980,6 +3009,8 @@ class InfoExtractor:
|
||||
'protocol': 'ism',
|
||||
'fragments': fragments,
|
||||
'has_drm': ism_doc.find('Protection') is not None,
|
||||
'language': stream_language,
|
||||
'audio_channels': int_or_none(track.get('Channels')),
|
||||
'_download_params': {
|
||||
'stream_type': stream_type,
|
||||
'duration': duration,
|
||||
@ -3435,7 +3466,7 @@ class InfoExtractor:
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
||||
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
@ -3510,7 +3541,7 @@ class InfoExtractor:
|
||||
@classmethod
|
||||
def is_single_video(cls, url):
|
||||
"""Returns whether the URL is of a single video, None if unknown"""
|
||||
assert cls.suitable(url), 'The URL must be suitable for the extractor'
|
||||
if cls.suitable(url):
|
||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||
|
||||
@classmethod
|
||||
@ -3524,7 +3555,7 @@ class InfoExtractor:
|
||||
desc = ''
|
||||
if cls._NETRC_MACHINE:
|
||||
if markdown:
|
||||
desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
|
||||
desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")'
|
||||
else:
|
||||
desc += f' [{cls._NETRC_MACHINE}]'
|
||||
if cls.IE_DESC is False:
|
||||
@ -3646,6 +3677,42 @@ class InfoExtractor:
|
||||
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
|
||||
or default)
|
||||
|
||||
def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
|
||||
if not duration:
|
||||
return
|
||||
chapter_list = [{
|
||||
'start_time': start_function(chapter),
|
||||
'title': title_function(chapter),
|
||||
} for chapter in chapter_list or []]
|
||||
if strict:
|
||||
warn = self.report_warning
|
||||
else:
|
||||
warn = self.write_debug
|
||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||
|
||||
chapters = [{'start_time': 0}]
|
||||
for idx, chapter in enumerate(chapter_list):
|
||||
if chapter['start_time'] is None:
|
||||
warn(f'Incomplete chapter {idx}')
|
||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||
chapters.append(chapter)
|
||||
elif chapter not in chapters:
|
||||
issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
|
||||
else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
|
||||
warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
|
||||
return chapters[1:]
|
||||
|
||||
def _extract_chapters_from_description(self, description, duration):
|
||||
duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
|
||||
sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
|
||||
return self._extract_chapters_helper(
|
||||
re.findall(sep_re % (duration_re, r'.+?'), description or ''),
|
||||
start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
|
||||
duration=duration, strict=False) or self._extract_chapters_helper(
|
||||
re.findall(sep_re % (r'.+?', duration_re), description or ''),
|
||||
start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
|
||||
duration=duration, strict=False)
|
||||
|
||||
@staticmethod
|
||||
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
|
||||
all_known = all(map(
|
||||
|
@ -4,7 +4,7 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
|
||||
errnote='Unable to download media JSON')
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
34
plugins/youtube_download/yt_dlp/extractor/crtvg.py
Normal file
34
plugins/youtube_download/yt_dlp/extractor/crtvg.py
Normal file
@ -0,0 +1,34 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class CrtvgIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?crtvg\.es/tvg/a-carta/[^/#?]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crtvg.es/tvg/a-carta/os-caimans-do-tea-5839623',
|
||||
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||
'info_dict': {
|
||||
'id': '5839623',
|
||||
'title': 'Os caimáns do Tea',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:f71cfba21ae564f0a6f415b31de1f842',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(r'var\s+url\s*=\s*["\']([^"\']+)', webpage, 'video url')
|
||||
formats = self._extract_m3u8_formats(video_url + '/playlist.m3u8', video_id, fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': remove_end(self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None), ' | CRTVG'),
|
||||
'description': self._html_search_meta('description', webpage, 'description', default=None),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, 'thumbnail', default=None),
|
||||
}
|
@ -1,28 +1,50 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_start,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
||||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
params = None
|
||||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return self._get_cookies(self._LOGIN_URL).get('etp_rt')
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
@ -35,7 +57,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': self._LOGIN_URL
|
||||
'referer': f'{self._BASE_URL}/welcome/login'
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
@ -43,149 +65,89 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=urllib.parse.urlencode({
|
||||
data=urlencode_postdata({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}).encode('ascii'))
|
||||
}))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _get_embedded_json(self, webpage, display_id):
|
||||
initial_state = self._parse_json(self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
|
||||
app_config = self._parse_json(self._search_regex(
|
||||
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
|
||||
return initial_state, app_config
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
def _get_params(self, lang):
|
||||
if not CrunchyrollBaseIE.params:
|
||||
if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
|
||||
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
|
||||
else:
|
||||
grant_type, key = 'client_id', 'anonClientId'
|
||||
|
||||
initial_state, app_config = self._get_embedded_json(self._download_webpage(
|
||||
f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
|
||||
}, data=f'grant_type={grant_type}'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
cms = policy_response.get('cms_web')
|
||||
bucket = cms['bucket']
|
||||
params = {
|
||||
'Policy': cms['policy'],
|
||||
'Signature': cms['signature'],
|
||||
'Key-Pair-Id': cms['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
CrunchyrollBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBaseIE.params
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta|www)\.crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
'id': 'GY2P1Q98Y',
|
||||
'ext': 'mp4',
|
||||
'duration': 1380.241,
|
||||
'timestamp': 1459632600,
|
||||
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
||||
'title': 'World Trigger Episode 73 – To the Future',
|
||||
'upload_date': '20160402',
|
||||
'series': 'World Trigger',
|
||||
'series_id': 'GR757DMKY',
|
||||
'season': 'World Trigger',
|
||||
'season_id': 'GR9P39NJ6',
|
||||
'season_number': 1,
|
||||
'episode': 'To the Future',
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
'chapters': 'count:2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
'duration': 366.459,
|
||||
'timestamp': 1476788400,
|
||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||
'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
|
||||
'upload_date': '20161018',
|
||||
'series': 'SHELTER',
|
||||
'series_id': 'GYGG09WWY',
|
||||
'season': 'SHELTER',
|
||||
'season_id': 'GR09MGK4R',
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
'chapters': 'count:0',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Video is Premium only',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
path = f'/content/v2/{self._API_ENDPOINT}/{path}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
try:
|
||||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
episode_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
||||
note='Retrieving episode metadata', query=params)
|
||||
if episode_response.get('is_premium_only') and not bucket.endswith('crunchyroll'):
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError('This video is for premium members only', expected=True)
|
||||
else:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
if not result:
|
||||
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||
return result
|
||||
|
||||
stream_response = self._download_json(
|
||||
f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
|
||||
note='Retrieving stream info', query=params)
|
||||
get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
def _extract_formats(self, stream_response, display_id=None):
|
||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||
|
||||
available_formats = {}
|
||||
for stream_type, streams in get_streams('streams'):
|
||||
for stream_type, streams in traverse_obj(
|
||||
stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
|
||||
if stream_type not in requested_formats:
|
||||
continue
|
||||
for stream in streams.values():
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
for stream in traverse_obj(streams, lambda _, v: v['url']):
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
if '' in available_formats and 'all' not in requested_hardsubs:
|
||||
full_format_langs = set(requested_hardsubs)
|
||||
self.to_screen(
|
||||
@ -196,6 +158,8 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
else:
|
||||
full_format_langs = set(map(str.lower, available_formats))
|
||||
|
||||
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
formats = []
|
||||
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if stream_type.endswith('hls'):
|
||||
@ -214,63 +178,292 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
continue
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['language'] = audio_locale
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
|
||||
chapters = None
|
||||
return formats
|
||||
|
||||
def _extract_subtitles(self, data):
|
||||
subtitles = {}
|
||||
|
||||
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
|
||||
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
|
||||
|
||||
return subtitles
|
||||
|
||||
|
||||
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||
_API_ENDPOINT = 'cms'
|
||||
_CMS_EXPIRY = None
|
||||
|
||||
def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
|
||||
if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
|
||||
response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
|
||||
CrunchyrollCmsBaseIE._CMS_QUERY = {
|
||||
'Policy': response['policy'],
|
||||
'Signature': response['signature'],
|
||||
'Key-Pair-Id': response['key_pair_id'],
|
||||
}
|
||||
CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
|
||||
CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
|
||||
|
||||
if not path.startswith('/cms/v2'):
|
||||
path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
|
||||
|
||||
return self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
'id': 'GY2P1Q98Y',
|
||||
'ext': 'mp4',
|
||||
'duration': 1380.241,
|
||||
'timestamp': 1459632600,
|
||||
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
||||
'title': 'World Trigger Episode 73 – To the Future',
|
||||
'upload_date': '20160402',
|
||||
'series': 'World Trigger',
|
||||
'series_id': 'GR757DMKY',
|
||||
'season': 'World Trigger',
|
||||
'season_id': 'GR9P39NJ6',
|
||||
'season_number': 1,
|
||||
'episode': 'To the Future',
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'chapters': 'count:2',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||
}, {
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
'duration': 366.459,
|
||||
'timestamp': 1476788400,
|
||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||
'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
|
||||
'upload_date': '20161018',
|
||||
'series': 'SHELTER',
|
||||
'series_id': 'GYGG09WWY',
|
||||
'season': 'SHELTER',
|
||||
'season_id': 'GR09MGK4R',
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
|
||||
'info_dict': {
|
||||
'id': 'GJWU2VKK3',
|
||||
'ext': 'mp4',
|
||||
'duration': 1420.054,
|
||||
'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
|
||||
'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'series': 'The Ice Guy and His Cool Female Colleague',
|
||||
'series_id': 'GW4HM75NP',
|
||||
'season': 'The Ice Guy and His Cool Female Colleague',
|
||||
'season_id': 'GY9PC21VE',
|
||||
'season_number': 1,
|
||||
'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'episode_number': 1,
|
||||
'chapters': 'count:2',
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'timestamp': 1672839000,
|
||||
'upload_date': '20230104',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
|
||||
'info_dict': {
|
||||
'id': 'GM8F313NQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'duration': 3996.104,
|
||||
'age_limit': 13,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||
'info_dict': {
|
||||
'id': 'G62PEZ2E6',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'age_limit': 13,
|
||||
'duration': 65.138,
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# We want to support lazy playlist filtering and movie listings cannot be inside a playlist
|
||||
_RETURN_TYPE = 'video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
# We need to use unsigned API call to allow ratings query string
|
||||
response = traverse_obj(self._call_api(
|
||||
f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
object_type = response.get('type')
|
||||
if object_type == 'episode':
|
||||
result = self._transform_episode_response(response)
|
||||
|
||||
elif object_type == 'movie':
|
||||
result = self._transform_movie_response(response)
|
||||
|
||||
elif object_type == 'movie_listing':
|
||||
first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
|
||||
if not self._yes_playlist(internal_id, first_movie_id):
|
||||
return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
|
||||
|
||||
def entries():
|
||||
movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
|
||||
for movie_response in traverse_obj(movies, ('data', ...)):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
|
||||
CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_movie_response(response))
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'Unknown object type {object_type}')
|
||||
|
||||
# There might be multiple audio languages for one object (`<object>_metadata.versions`),
|
||||
# so we need to get the id from `streams_link` instead or we dont know which language to choose
|
||||
streams_link = response.get('streams_link')
|
||||
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
# We need go from unsigned to signed api to avoid getting soft banned
|
||||
stream_response = self._call_cms_api_signed(remove_start(
|
||||
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
|
||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||
result['subtitles'] = self._extract_subtitles(stream_response)
|
||||
|
||||
# if no intro chapter is available, a 403 without usable data is returned
|
||||
intro_chapter = self._download_json(f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
||||
display_id, fatal=False, errnote=False)
|
||||
intro_chapter = self._download_json(
|
||||
f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||
if isinstance(intro_chapter, dict):
|
||||
chapters = [{
|
||||
result['chapters'] = [{
|
||||
'title': 'Intro',
|
||||
'start_time': float_or_none(intro_chapter.get('startTime')),
|
||||
'end_time': float_or_none(intro_chapter.get('endTime'))
|
||||
'end_time': float_or_none(intro_chapter.get('endTime')),
|
||||
}]
|
||||
|
||||
def calculate_count(item):
|
||||
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||
|
||||
result.update(traverse_obj(response, ('rating', {
|
||||
'like_count': ('up', {calculate_count}),
|
||||
'dislike_count': ('down', {calculate_count}),
|
||||
})))
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_episode_response(data):
|
||||
metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': internal_id,
|
||||
'title': '%s Episode %s – %s' % (
|
||||
episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
|
||||
'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
|
||||
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
|
||||
'timestamp': parse_iso8601(episode_response.get('upload_date')),
|
||||
'series': episode_response.get('series_title'),
|
||||
'series_id': episode_response.get('series_id'),
|
||||
'season': episode_response.get('season_title'),
|
||||
'season_id': episode_response.get('season_id'),
|
||||
'season_number': episode_response.get('season_number'),
|
||||
'episode': episode_response.get('title'),
|
||||
'episode_number': episode_response.get('sequence_number'),
|
||||
'formats': formats,
|
||||
'thumbnails': [{
|
||||
'url': thumb.get('source'),
|
||||
'width': thumb.get('width'),
|
||||
'height': thumb.get('height'),
|
||||
} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
|
||||
'subtitles': {
|
||||
lang: [{
|
||||
'url': subtitle_data.get('url'),
|
||||
'ext': subtitle_data.get('format')
|
||||
}] for lang, subtitle_data in get_streams('subtitles')
|
||||
},
|
||||
'chapters': chapters
|
||||
'id': data['id'],
|
||||
'title': ' \u2013 '.join((
|
||||
('%s%s' % (
|
||||
format_field(metadata, 'season_title'),
|
||||
format_field(metadata, 'episode', ' Episode %s'))),
|
||||
format_field(data, 'title'))),
|
||||
**traverse_obj(data, {
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', ({int}, {float_or_none})),
|
||||
'episode_number': ('sequence_number', ({int}, {float_or_none})),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'language': ('audio_locale', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _transform_movie_response(data):
|
||||
metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta|www)\.crunchyroll\.com/
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
series/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
series/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
'id': 'GY19NQ2QR',
|
||||
'title': 'Girl Friend BETA',
|
||||
'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
|
||||
# XXX: `thumbnail` does not get set from `thumbnails` in playlist
|
||||
# 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
@ -279,41 +472,179 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
series_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
|
||||
note='Retrieving series metadata', query=params)
|
||||
|
||||
seasons_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
|
||||
note='Retrieving season list', query=params)
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
def entries():
|
||||
for season in seasons_response['items']:
|
||||
episodes_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
|
||||
note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
|
||||
for episode in episodes_response['items']:
|
||||
episode_id = episode['id']
|
||||
episode_display_id = episode['slug_title']
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
|
||||
'ie_key': CrunchyrollBetaIE.ie_key(),
|
||||
'id': episode_id,
|
||||
'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
|
||||
'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
|
||||
'duration': float_or_none(episode.get('duration_ms'), 1000),
|
||||
'series': episode.get('series_title'),
|
||||
'series_id': episode.get('series_id'),
|
||||
'season': episode.get('season_title'),
|
||||
'season_id': episode.get('season_id'),
|
||||
'season_number': episode.get('season_number'),
|
||||
'episode': episode.get('title'),
|
||||
'episode_number': episode.get('sequence_number'),
|
||||
'language': episode.get('audio_locale'),
|
||||
seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
|
||||
for season in traverse_obj(seasons_response, ('items', ..., {dict})):
|
||||
episodes_response = self._call_cms_api_signed(
|
||||
f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
|
||||
for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
|
||||
CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), internal_id,
|
||||
**traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'thumbnails': ('images', ..., ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})
|
||||
})))
|
||||
|
||||
|
||||
class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:music'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV88BB7F2C',
|
||||
'display_id': 'crossing-field',
|
||||
'title': 'Crossing Field',
|
||||
'track': 'Crossing Field',
|
||||
'artist': 'LiSA',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['Anime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MC2E2AC135',
|
||||
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'artist': 'LiSA',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||
'genre': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
|
||||
path, name = {
|
||||
'concert': ('concerts', 'concert info'),
|
||||
'musicvideo': ('music_videos', 'music video info'),
|
||||
}[object_type]
|
||||
response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
streams_link = response.get('streams_link')
|
||||
if not streams_link and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
|
||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_music_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'display_id': 'slug',
|
||||
'title': 'title',
|
||||
'track': 'title',
|
||||
'artist': ('artist', 'name'),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genre': ('genres', ..., 'displayValue'),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), internal_id, series_response.get('title'))
|
||||
|
||||
class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:artist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
artist/(?P<id>\w{10})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
|
||||
'info_dict': {
|
||||
'id': 'MA179CB50D',
|
||||
'title': 'LiSA',
|
||||
'genre': ['J-Pop', 'Anime', 'Rock'],
|
||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||
},
|
||||
'playlist_mincount': 83,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
response = traverse_obj(self._call_api(
|
||||
f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
|
||||
|
||||
def entries():
|
||||
for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
|
||||
for internal_id in traverse_obj(response, (attribute, ...)):
|
||||
yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_artist_response(response))
|
||||
|
||||
@staticmethod
|
||||
def _transform_artist_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': 'name',
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genre': ('genres', ..., 'displayValue'),
|
||||
}),
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
|
158
plugins/youtube_download/yt_dlp/extractor/dacast.py
Normal file
158
plugins/youtube_download/yt_dlp/extractor/dacast.py
Normal file
@ -0,0 +1,158 @@
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DacastBaseIE(InfoExtractor):
|
||||
_URL_TYPE = None
|
||||
|
||||
@classproperty
|
||||
def _VALID_URL(cls):
|
||||
return fr'https?://iframe\.dacast\.com/{cls._URL_TYPE}/(?P<user_id>[\w-]+)/(?P<id>[\w-]+)'
|
||||
|
||||
@classproperty
|
||||
def _EMBED_REGEX(cls):
|
||||
return [rf'<iframe[^>]+\bsrc=["\'](?P<url>{cls._VALID_URL})']
|
||||
|
||||
_API_INFO_URL = 'https://playback.dacast.com/content/info'
|
||||
|
||||
@classmethod
|
||||
def _get_url_from_id(cls, content_id):
|
||||
user_id, media_id = content_id.split(f'-{cls._URL_TYPE}-')
|
||||
return f'https://iframe.dacast.com/{cls._URL_TYPE}/{user_id}/{media_id}'
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for content_id in re.findall(
|
||||
rf'<script[^>]+\bsrc=["\']https://player\.dacast\.com/js/player\.js\?contentId=([\w-]+-{cls._URL_TYPE}-[\w-]+)["\']', webpage):
|
||||
yield cls._get_url_from_id(content_id)
|
||||
|
||||
|
||||
class DacastVODIE(DacastBaseIE):
|
||||
_URL_TYPE = 'vod'
|
||||
_TESTS = [{
|
||||
'url': 'https://iframe.dacast.com/vod/acae82153ef4d7a7344ae4eaa86af534/1c6143e3-5a06-371d-8695-19b96ea49090',
|
||||
'info_dict': {
|
||||
'id': '1c6143e3-5a06-371d-8695-19b96ea49090',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'acae82153ef4d7a7344ae4eaa86af534',
|
||||
'title': '2_4||Adnexal mass characterisation: O-RADS US and MRI||N. Bharwani, London/UK',
|
||||
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||
'info_dict': {
|
||||
'id': 'b6674869-f08a-23c5-1d7b-81f5309e1a90',
|
||||
'ext': 'mp4',
|
||||
'title': '4-HowToEmbedVideo.mp4',
|
||||
'uploader_id': '3b67c4a9-3886-4eb1-d0eb-39b23b14bef3',
|
||||
'thumbnail': 'https://universe-files.dacast.com/d26ab48f-a52a-8783-c42e-a90290ba06b6.png',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://gist.githubusercontent.com/bashonly/4ad249ef2910346fbdf3809b220f11ee/raw/87349778d4af1a80b1fcc3beb9c88108de5858f5/dacast_embeds.html',
|
||||
'info_dict': {
|
||||
'id': 'e7df418e-a83b-7a7f-7b5e-1a667981e8fa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Evening Service 2-5-23',
|
||||
'uploader_id': '943bb1ab3c03695ba85330d92d6d226e',
|
||||
'thumbnail': 'https://universe-files.dacast.com/337472b3-e92c-2ea4-7eb7-5700da477f67',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||
access = self._download_json(
|
||||
'https://playback.dacast.com/content/access', video_id,
|
||||
note='Downloading access JSON', query=query, expected_status=403)
|
||||
|
||||
error = access.get('error')
|
||||
if error in ('Broadcaster has been blocked', 'Content is offline'):
|
||||
raise ExtractorError(error, expected=True)
|
||||
elif error:
|
||||
raise ExtractorError(f'Dacast API says "{error}"')
|
||||
|
||||
hls_url = access['hls']
|
||||
hls_aes = {}
|
||||
|
||||
if 'DRM_EXT' in hls_url:
|
||||
self.report_drm(video_id)
|
||||
elif '/uspaes/' in hls_url:
|
||||
# From https://player.dacast.com/js/player.js
|
||||
ts = int(time.time())
|
||||
signature = hashlib.sha1(
|
||||
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
# CDN will randomly respond with 403
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader_id': user_id,
|
||||
'formats': formats,
|
||||
'hls_aes': hls_aes or None,
|
||||
**traverse_obj(info, ('contentInfo', {
|
||||
'title': 'title',
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class DacastPlaylistIE(DacastBaseIE):
|
||||
_URL_TYPE = 'playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://iframe.dacast.com/playlist/943bb1ab3c03695ba85330d92d6d226e/b632eb053cac17a9c9a02bcfc827f2d8',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
|
||||
'title': 'Archive Sermons',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://gist.githubusercontent.com/bashonly/7efb606f49f3c6e07ea0327de5a661d1/raw/05a16eac830245ea301fb0a585023bec71e6093c/dacast_playlist_embed.html',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
|
||||
'title': 'Archive Sermons',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, playlist_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
info = self._download_json(
|
||||
self._API_INFO_URL, playlist_id, note='Downloading playlist JSON', query={
|
||||
'contentId': f'{user_id}-playlist-{playlist_id}',
|
||||
'provider': 'universe',
|
||||
})['contentInfo']
|
||||
|
||||
def entries(info):
|
||||
for video in traverse_obj(info, ('features', 'playlist', 'contents', lambda _, v: v['id'])):
|
||||
yield self.url_result(
|
||||
DacastVODIE._get_url_from_id(video['id']), DacastVODIE, video['id'], video.get('title'))
|
||||
|
||||
return self.playlist_result(entries(info), playlist_id, info.get('title'))
|
@ -1,6 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_count,
|
||||
@ -12,21 +13,24 @@ from ..utils import (
|
||||
|
||||
|
||||
class DaftsexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://daftsex.com/watch/-35370899_456246186',
|
||||
'md5': 'd95135e6cea2d905bea20dbe82cda64a',
|
||||
'url': 'https://daft.sex/watch/-35370899_456246186',
|
||||
'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
|
||||
'info_dict': {
|
||||
'id': '-35370899_456246186',
|
||||
'ext': 'mp4',
|
||||
'title': 'just relaxing',
|
||||
'description': 'just relaxing - Watch video Watch video in high quality',
|
||||
'description': 'just relaxing – Watch video Watch video in high quality',
|
||||
'upload_date': '20201113',
|
||||
'timestamp': 1605261911,
|
||||
'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'duration': 15.0,
|
||||
'view_count': int
|
||||
},
|
||||
}, {
|
||||
'url': 'https://daftsex.com/watch/-156601359_456242791',
|
||||
'url': 'https://daft.sex/watch/-156601359_456242791',
|
||||
'info_dict': {
|
||||
'id': '-156601359_456242791',
|
||||
'ext': 'mp4',
|
||||
@ -36,6 +40,7 @@ class DaftsexIE(InfoExtractor):
|
||||
'timestamp': 1600250735,
|
||||
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
|
||||
},
|
||||
'skip': 'deleted / private'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -60,7 +65,7 @@ class DaftsexIE(InfoExtractor):
|
||||
webpage, 'player color', fatal=False) or ''
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
'https://daxab.com/player/%s?color=%s' % (player_hash, player_color),
|
||||
'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
|
||||
video_id, headers={'Referer': url})
|
||||
video_params = self._parse_json(
|
||||
self._search_regex(
|
||||
@ -94,15 +99,19 @@ class DaftsexIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
item = self._download_json(
|
||||
items = self._download_json(
|
||||
f'{server_domain}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': video_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': video_params['c_key'],
|
||||
'credentials': video_params['video']['credentials'],
|
||||
})['response']['items'][0]
|
||||
})['response']['items']
|
||||
|
||||
if not items:
|
||||
raise ExtractorError('Video is not available', video_id=video_id, expected=True)
|
||||
|
||||
item = items[0]
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
|
@ -3,7 +3,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), xid)['error_description'], expected=True)
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
@ -40,6 +40,19 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.digitalconcerthall.com/en/film/388',
|
||||
'info_dict': {
|
||||
'id': '388',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
|
||||
'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
'upload_date': '20220714',
|
||||
'timestamp': 1657785600,
|
||||
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@ -75,7 +88,7 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
if not self._ACCESS_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
def _entries(self, items, language, **kwargs):
|
||||
def _entries(self, items, language, type_, **kwargs):
|
||||
for item in items:
|
||||
video_id = item['id']
|
||||
stream_info = self._download_json(
|
||||
@ -103,11 +116,11 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'start_time': chapter.get('time'),
|
||||
'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
|
||||
'title': chapter.get('text'),
|
||||
} for chapter in item['cuepoints']] if item.get('cuepoints') else None,
|
||||
} for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, video_id = self._match_valid_url(url).group('language', 'id')
|
||||
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
@ -120,18 +133,18 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
vid_info = self._download_json(
|
||||
f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={
|
||||
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language
|
||||
})
|
||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': vid_info.get('title'),
|
||||
'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language,
|
||||
thumbnails=thumbnails, album_artist=album_artist),
|
||||
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
||||
'thumbnails': thumbnails,
|
||||
'album_artist': album_artist,
|
||||
}
|
||||
|
35
plugins/youtube_download/yt_dlp/extractor/discogs.py
Normal file
35
plugins/youtube_download/yt_dlp/extractor/discogs.py
Normal file
@ -0,0 +1,35 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class DiscogsReleasePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
|
||||
'info_dict': {
|
||||
'id': 'release1',
|
||||
'title': 'Stockholm',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
|
||||
'info_dict': {
|
||||
'id': 'master113',
|
||||
'title': 'Moments In Time',
|
||||
},
|
||||
'playlist_mincount': 53,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
display_id = f'{playlist_type}{playlist_id}'
|
||||
response = self._download_json(
|
||||
f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
|
||||
for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
|
||||
|
||||
return self.playlist_result(entries, display_id, response.get('title'))
|
@ -3,8 +3,8 @@ import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.read().decode(), display_id)['description']
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
|
192
plugins/youtube_download/yt_dlp/extractor/dlf.py
Normal file
192
plugins/youtube_download/yt_dlp/extractor/dlf.py
Normal file
@ -0,0 +1,192 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DLFBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||
|
||||
def _parse_button_attrs(self, button, audio_id=None):
|
||||
attrs = extract_attributes(button)
|
||||
audio_id = audio_id or attrs['data-audio-diraid']
|
||||
|
||||
url = traverse_obj(
|
||||
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||
'data-audio-src', expected_type=url_or_none)
|
||||
ext = determine_ext(url)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'extractor_key': DLFIE.ie_key(),
|
||||
'extractor': DLFIE.IE_NAME,
|
||||
**traverse_obj(attrs, {
|
||||
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
|
||||
'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
|
||||
'thumbnail': ('data-audioimage', {url_or_none}),
|
||||
'uploader': 'data-audio-producer',
|
||||
'series': 'data-audio-series',
|
||||
'channel': 'data-audio-origin-site-name',
|
||||
'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
|
||||
}, get_all=False),
|
||||
'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||
}
|
||||
|
||||
|
||||
class DLFIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||
_TESTS = [
|
||||
# Audio as an HLS stream
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||
'info_dict': {
|
||||
'id': '03a3eb19',
|
||||
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||
'ext': 'm4a',
|
||||
'duration': 3298,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'On Stage',
|
||||
'channel': 'deutschlandfunk'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8'
|
||||
},
|
||||
'skip': 'This webpage no longer exists'
|
||||
}, {
|
||||
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||
'info_dict': {
|
||||
'id': 'd9cc1856',
|
||||
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||
'ext': 'mp3',
|
||||
'duration': 291,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Kommentare und Themen der Woche',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
return self._parse_button_attrs(
|
||||
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||
|
||||
|
||||
class DLFCorpusIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf:corpus'
|
||||
IE_DESC = 'DLF Multi-feed Archives'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||
_TESTS = [
|
||||
# Recorded news broadcast with referrals to related broadcasts
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||
'info_dict': {
|
||||
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '1fc5d64a',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'ext': 'mp3',
|
||||
'duration': 252,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2ada145f',
|
||||
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||
'ext': 'mp3',
|
||||
'duration': 336,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Deutschlandfunk Nova',
|
||||
'channel': 'deutschlandfunk-nova'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '47e1a096',
|
||||
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||
'ext': 'mp3',
|
||||
'duration': 602,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}]
|
||||
},
|
||||
# Podcast feed with tag buttons, playlist count fluctuates
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||
'info_dict': {
|
||||
'id': 'kommentare-und-themen-der-woche-100',
|
||||
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
# Podcast feed with no description
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||
'info_dict': {
|
||||
'id': 'podcast-tolle-idee-100',
|
||||
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
'title': self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, default=None),
|
||||
'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||
}
|
@ -2,7 +2,7 @@ import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
return f'Bearer {token}'
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
@ -65,6 +65,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
return streaming_list
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
|
||||
country = self.get_param('geo_bypass_country') or country
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
@ -86,7 +87,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
@ -98,7 +99,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_dict in streaming:
|
||||
@ -745,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
|
||||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
@ -766,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
||||
'info_dict': {
|
||||
'id': '4922860',
|
||||
'ext': 'mp4',
|
||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadworthy Rescues',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'upload_date': '20220907',
|
||||
'timestamp': 1662523200,
|
||||
'duration': 1066.356,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
@ -1001,3 +1021,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
|
||||
_SHOW_STR = 'show'
|
||||
_INDEX = 4
|
||||
_VIDEO_IE = DiscoveryPlusIndiaIE
|
||||
|
||||
|
||||
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
|
||||
'info_dict': {
|
||||
'id': '1397691',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
|
||||
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
|
||||
'series': 'gcn',
|
||||
'creator': 'Gcn',
|
||||
'upload_date': '20210309',
|
||||
'timestamp': 1615248000,
|
||||
'duration': 2531.0,
|
||||
'tags': [],
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'web'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
|
||||
'realm': 'gcn',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
@ -1,13 +1,17 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VHXEmbedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_elements_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@ -162,12 +166,13 @@ class DropoutIE(InfoExtractor):
|
||||
|
||||
|
||||
class DropoutSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
|
||||
_PAGE_SIZE = 24
|
||||
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
|
||||
'note': 'Multi-season series with the season in the url',
|
||||
'playlist_count': 17,
|
||||
'playlist_count': 24,
|
||||
'info_dict': {
|
||||
'id': 'dimension-20-fantasy-high-season-1',
|
||||
'title': 'Dimension 20 Fantasy High - Season 1'
|
||||
@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
|
||||
'note': 'Multi-season series with the season not in the url',
|
||||
'playlist_count': 17,
|
||||
'playlist_count': 24,
|
||||
'info_dict': {
|
||||
'id': 'dimension-20-fantasy-high-season-1',
|
||||
'title': 'Dimension 20 Fantasy High - Season 1'
|
||||
@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor):
|
||||
'id': 'dimension-20-shriek-week-season-1',
|
||||
'title': 'Dimension 20 Shriek Week - Season 1'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
|
||||
'note': 'Multi-season series with season in the url that requires pagination',
|
||||
'playlist_count': 25,
|
||||
'info_dict': {
|
||||
'id': 'breaking-news-no-laugh-newsroom-season-3',
|
||||
'title': 'Breaking News No Laugh Newsroom - Season 3'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _fetch_page(self, url, season_id, page):
|
||||
page += 1
|
||||
webpage = self._download_webpage(
|
||||
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
|
||||
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
|
||||
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id = self._match_id(url)
|
||||
season_num = self._match_valid_url(url).group('season') or 1
|
||||
season_title = season_id.replace('-', ' ').title()
|
||||
webpage = self._download_webpage(url, season_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
|
||||
item, 'item_url'),
|
||||
ie=DropoutIE.ie_key()
|
||||
) for item in get_elements_by_class('js-collection-item', webpage)
|
||||
]
|
||||
|
||||
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
|
||||
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
|
||||
seasons, 'current_season', default='').strip()
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
|
||||
'title': join_nonempty(season_title, current_season, delim=' - '),
|
||||
'entries': entries
|
||||
}
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
|
||||
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')
|
||||
|
@ -12,7 +12,6 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
@ -25,7 +24,7 @@ class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||
)
|
||||
(?P<id>[\da-z_-]+)
|
||||
@ -80,7 +79,7 @@ class DRTVIE(InfoExtractor):
|
||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||
'timestamp': 1546628400,
|
||||
'upload_date': '20190104',
|
||||
'duration': 3504.618,
|
||||
'duration': 3504.619,
|
||||
'formats': 'mincount:20',
|
||||
'release_year': 2017,
|
||||
'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
|
||||
@ -101,14 +100,16 @@ class DRTVIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Bonderøven 2019 (1:8)',
|
||||
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
|
||||
'timestamp': 1603188600,
|
||||
'upload_date': '20201020',
|
||||
'timestamp': 1654856100,
|
||||
'upload_date': '20220610',
|
||||
'duration': 2576.6,
|
||||
'season': 'Bonderøven 2019',
|
||||
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
|
||||
'release_year': 2019,
|
||||
'season_number': 2019,
|
||||
'series': 'Frank & Kastaniegaarden'
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -140,10 +141,26 @@ class DRTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this video has been removed',
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '14802310112',
|
||||
'timestamp': 1678786200,
|
||||
'duration': 120.043,
|
||||
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
|
||||
'series': 'P4 København regionale nyheder',
|
||||
'upload_date': '20230314',
|
||||
'release_year': 0,
|
||||
'description': 'Hør seneste regionale nyheder fra P4 København.',
|
||||
'season': 'Regionale nyheder',
|
||||
'title': 'Regionale nyheder',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
raw_video_id = self._match_id(url)
|
||||
raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
|
||||
|
||||
webpage = self._download_webpage(url, raw_video_id)
|
||||
|
||||
@ -170,15 +187,17 @@ class DRTVIE(InfoExtractor):
|
||||
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||
else:
|
||||
programcard_url = _PROGRAMCARD_BASE
|
||||
page = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
|
||||
'data'), '1')['cache']['page']
|
||||
page = page[list(page.keys())[0]]
|
||||
item = try_get(
|
||||
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
|
||||
dict)
|
||||
video_id = item['customId'].split(':')[-1]
|
||||
if is_radio_url:
|
||||
video_id = self._search_nextjs_data(
|
||||
webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
|
||||
else:
|
||||
json_data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', raw_video_id)
|
||||
video_id = traverse_obj(json_data, (
|
||||
'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
|
||||
{lambda x: x.split(':')[-1]}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
query['productionnumber'] = video_id
|
||||
|
||||
data = self._download_json(
|
||||
@ -269,10 +288,11 @@ class DRTVIE(InfoExtractor):
|
||||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
quality=preference, m3u8_id=format_id,
|
||||
fatal=False))
|
||||
quality=preference, m3u8_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
|
@ -1,12 +1,17 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
|
||||
/(?:mediabase|embed|item)/|
|
||||
(?:/toppers|/latest|/?)\?selectedId=
|
||||
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
|
||||
'title': 'Ik heb nieuws voor je',
|
||||
'description': 'Niet schrikken hoor',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 9,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
|
||||
@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
|
||||
'info_dict': {
|
||||
'id': '100031688/b317a185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Epic schijnbeweging',
|
||||
'description': '<p>Die zag je niet eh</p>',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 12,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -36,17 +66,22 @@ class DumpertIE(InfoExtractor):
|
||||
title = item['title']
|
||||
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
|
||||
formats = []
|
||||
for variant in media.get('variants', []):
|
||||
uri = variant.get('uri')
|
||||
if not uri:
|
||||
continue
|
||||
version = variant.get('version')
|
||||
preference = quality(version)
|
||||
if determine_ext(uri) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', m3u8_id=version, quality=preference))
|
||||
else:
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': version,
|
||||
'quality': quality(version),
|
||||
'quality': preference,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
|
@ -2,7 +2,7 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor):
|
||||
response = super(EaglePlatformIE, self)._download_json(
|
||||
url_or_request, video_id, *args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError):
|
||||
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||
if isinstance(ee.cause, HTTPError):
|
||||
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
|
||||
self._handle_error(response)
|
||||
raise
|
||||
return response
|
||||
|
36
plugins/youtube_download/yt_dlp/extractor/ebay.py
Normal file
36
plugins/youtube_download/yt_dlp/extractor/ebay.py
Normal file
@ -0,0 +1,36 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class EbayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ebay\.com/itm/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ebay.com/itm/194509326719',
|
||||
'info_dict': {
|
||||
'id': '194509326719',
|
||||
'ext': 'mp4',
|
||||
'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_json = self._search_json(r'"video":', webpage, 'video json', video_id)
|
||||
|
||||
formats = []
|
||||
for key, url in video_json['playlistMap'].items():
|
||||
if key == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, fatal=False))
|
||||
elif key == 'DASH':
|
||||
formats.extend(self._extract_mpd_formats(url, video_id, fatal=False))
|
||||
else:
|
||||
self.report_warning(f'Unsupported format {key}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': remove_end(self._html_extract_title(webpage), ' | eBay'),
|
||||
'formats': formats
|
||||
}
|
@ -1,10 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
@ -54,7 +50,7 @@ class EitbIE(InfoExtractor):
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
|
59
plugins/youtube_download/yt_dlp/extractor/elevensports.py
Normal file
59
plugins/youtube_download/yt_dlp/extractor/elevensports.py
Normal file
@ -0,0 +1,59 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ElevenSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk',
|
||||
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||
'info_dict': {
|
||||
'id': 'clf46yr3kenn80jgrqsjmwefk',
|
||||
'title': 'Cleveland SC vs Lionsbridge FC',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58',
|
||||
'upload_date': '20230323',
|
||||
'timestamp': 1679612400,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf',
|
||||
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||
'info_dict': {
|
||||
'id': 'clhpyd53b06160jez74qhgkmf',
|
||||
'title': 'AJNLF vs ARRAF',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1',
|
||||
'upload_date': '20230521',
|
||||
'timestamp': 1684684800,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId']
|
||||
event_data = self._download_json(
|
||||
f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id,
|
||||
headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'})
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(event_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('start_time', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -61,6 +61,35 @@ class EmbedlyIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'http://www.permacultureetc.com/2022/12/comment-greffer-facilement-les-arbres-fruitiers.html',
|
||||
'info_dict': {
|
||||
'id': 'pfUK_ADTvgY',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comment greffer facilement les arbres fruitiers ? (mois par mois)',
|
||||
'description': 'md5:d3a876995e522f138aabb48e040bfb4c',
|
||||
'view_count': int,
|
||||
'upload_date': '20221210',
|
||||
'comment_count': int,
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCsM4_jihNFYe4CtSkXvDR-Q',
|
||||
'channel_follower_count': int,
|
||||
'tags': ['permaculture', 'jardinage', 'dekarz', 'autonomie', 'greffe', 'fruitiers', 'arbres', 'jardin forêt', 'forêt comestible', 'damien'],
|
||||
'playable_in_embed': True,
|
||||
'uploader': 'permaculture agroécologie etc...',
|
||||
'channel': 'permaculture agroécologie etc...',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/pfUK_ADTvgY/sddefault.jpg',
|
||||
'duration': 1526,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsM4_jihNFYe4CtSkXvDR-Q',
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'permacultureetc',
|
||||
'like_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/user/permacultureetc',
|
||||
'categories': ['Education'],
|
||||
'availability': 'public',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
# Bypass "ie=cls" and suitable check
|
||||
|
@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
video_id = self._match_id(urlh.url)
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
@ -240,7 +240,7 @@ class FiveThirtyEightIE(InfoExtractor):
|
||||
|
||||
|
||||
class ESPNCricInfoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/(?:cricket-)?videos?/[^#$&?/]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135',
|
||||
'info_dict': {
|
||||
@ -252,6 +252,17 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
'duration': 96,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://www.espncricinfo.com/cricket-videos/daryl-mitchell-mitchell-santner-is-one-of-the-best-white-ball-spinners-india-vs-new-zealand-1356225',
|
||||
'info_dict': {
|
||||
'id': '1356225',
|
||||
'ext': 'mp4',
|
||||
'description': '"Santner has done it for a long time for New Zealand - we\'re lucky to have him"',
|
||||
'upload_date': '20230128',
|
||||
'title': 'Mitchell: \'Santner is one of the best white-ball spinners at the moment\'',
|
||||
'duration': 87,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
60
plugins/youtube_download/yt_dlp/extractor/ettutv.py
Normal file
60
plugins/youtube_download/yt_dlp/extractor/ettutv.py
Normal file
@ -0,0 +1,60 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
|
||||
|
||||
|
||||
class EttuTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
|
||||
'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
|
||||
'info_dict': {
|
||||
'id': '1573849',
|
||||
'title': 'Ni Xia Lian - Shao Jieni',
|
||||
'description': 'ITTF Europe Top 16 Cup',
|
||||
'timestamp': 1677348600,
|
||||
'upload_date': '20230225',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
|
||||
'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
|
||||
'info_dict': {
|
||||
'id': '1573753',
|
||||
'title': 'Qiu Dang - Jorgic Darko',
|
||||
'description': 'ITTF Europe Top 16 Cup',
|
||||
'timestamp': 1677423600,
|
||||
'upload_date': '20230226',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_settings = self._download_json(
|
||||
f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
|
||||
'language': 'en',
|
||||
'showTitle': 'true',
|
||||
'device': 'desktop',
|
||||
})
|
||||
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_response['data']['stream'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(player_settings, {
|
||||
'title': 'title',
|
||||
'description': ('metaInformation', 'competition'),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('date', {unified_timestamp}),
|
||||
'is_live': ('isLivestream', {bool_or_none}),
|
||||
})
|
||||
}
|
@ -6,6 +6,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
xpath_text
|
||||
)
|
||||
@ -92,42 +93,17 @@ class EuropaIE(InfoExtractor):
|
||||
|
||||
class EuroParlWebstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
|
||||
(?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
||||
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
||||
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||
'info_dict': {
|
||||
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1663137900,
|
||||
'title': 'Plenary session',
|
||||
'release_date': '20220914',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
|
||||
'info_dict': {
|
||||
'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1668434400,
|
||||
'release_date': '20221114',
|
||||
'title': 'md5:d3550280c33cc70e0678652e3d52c028',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# embed webpage
|
||||
'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
|
||||
'info_dict': {
|
||||
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
|
||||
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plenary session',
|
||||
'release_timestamp': 1663139069,
|
||||
'release_date': '20220914',
|
||||
'release_timestamp': 1663137900,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -144,30 +120,54 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'not live anymore'
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||
'info_dict': {
|
||||
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20230301',
|
||||
'title': 'Committee on Culture and Education',
|
||||
'release_timestamp': 1677666641,
|
||||
}
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI',
|
||||
'info_dict': {
|
||||
'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20230524',
|
||||
'title': r're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}',
|
||||
'release_timestamp': 1684911541,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Not live anymore'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_nextjs = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
|
||||
|
||||
json_info = self._download_json(
|
||||
'https://vis-api.vuplay.co.uk/event/external', display_id,
|
||||
'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id,
|
||||
query={
|
||||
'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
|
||||
'external_id': display_id,
|
||||
'api-version': 1.0,
|
||||
'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968',
|
||||
'externalReference': display_id
|
||||
})
|
||||
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
|
||||
|
||||
formats.extend(fmts)
|
||||
formats, subtitles = [], {}
|
||||
for hls_url in traverse_obj(json_info, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')):
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(hls_url, display_id)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': json_info['id'],
|
||||
'title': json_info.get('title'),
|
||||
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'release_timestamp': parse_iso8601(json_info.get('published_start')),
|
||||
'is_live': 'LIVE' in json_info.get('state', '')
|
||||
'release_timestamp': parse_iso8601(json_info.get('startDateTime')),
|
||||
'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live'
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@ -44,6 +44,32 @@ class EurosportIE(InfoExtractor):
|
||||
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
|
||||
'upload_date': '20220727',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '3096477',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:82edc17370124c7a19b3cf518517583b',
|
||||
'duration': 84.0,
|
||||
'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
|
||||
'timestamp': 1681292028,
|
||||
'upload_date': '20230412',
|
||||
'display_id': 'vid1896254',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '3149108',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
|
||||
'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
|
||||
'display_id': 'vid1914115',
|
||||
'timestamp': 1684403618,
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
}
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
|
@ -8,6 +8,8 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
@ -19,11 +21,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
network_exceptions,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt',
|
||||
'description': 'Asif Nawab Butt',
|
||||
'title': 'Asif',
|
||||
'description': '',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
]
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '3f3798adb2b73423263e59376f1f5eb7',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
|
||||
'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
|
||||
'description': 'Довгоочікуване відео',
|
||||
'timestamp': 1486648771,
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
'uploader_id': '100000948048708',
|
||||
'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
|
||||
'concurrent_view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'duration': 11736.446,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'La Guía Del Varón',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
'uploader_id': '100013949973717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1527084179,
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '234218833769558',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||
'info_dict': {
|
||||
'id': '106560053808006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||
@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '10157667649866271',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||
@ -319,7 +337,7 @@ class FacebookIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
login_page_req = Request(self._LOGIN_URL)
|
||||
self._set_cookie('facebook.com', 'locale', 'en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
@ -340,8 +358,8 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
@ -367,8 +385,8 @@ class FacebookIE(InfoExtractor):
|
||||
'h': h,
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
@ -390,7 +408,10 @@ class FacebookIE(InfoExtractor):
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
@ -415,16 +436,17 @@ class FacebookIE(InfoExtractor):
|
||||
# in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
|
||||
if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
|
||||
thumbnail = None
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
info_dict = {
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_data.get('id'),
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'view_count': parse_count(self._search_regex(
|
||||
(r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@ -459,7 +481,8 @@ class FacebookIE(InfoExtractor):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@ -493,6 +516,13 @@ class FacebookIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
reel_info = traverse_obj(
|
||||
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
|
||||
if reel_info:
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
@ -509,15 +539,15 @@ class FacebookIE(InfoExtractor):
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': traverse_obj(
|
||||
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
|
||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
@ -778,18 +808,18 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
|
||||
'description': 'md5:24ea7ef062215d295bdde64e778f5474',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '1738535909799870',
|
||||
'duration': 9.536,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'timestamp': 1637502604,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -3,11 +3,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..dependencies import websockets
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
WebSocketsWrapper,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
@ -57,7 +57,7 @@ class FC2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
login_data = urlencode_postdata(login_form_strs)
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
@ -66,7 +66,7 @@ class FC2IE(InfoExtractor):
|
||||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor):
|
||||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor):
|
||||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
|
@ -3,10 +3,10 @@ import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||
@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
|
||||
# sports event, geo-restricted
|
||||
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# fox sports replay, geo-restricted
|
||||
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
@ -68,9 +72,9 @@ class FOXIE(InfoExtractor):
|
||||
'https://api3.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
entitlement_issues = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['entitlementIssues']
|
||||
e.cause.response.read().decode(), video_id)['entitlementIssues']
|
||||
for e in entitlement_issues:
|
||||
if e.get('errorCode') == 1005:
|
||||
raise ExtractorError(
|
||||
@ -123,8 +127,8 @@ class FOXIE(InfoExtractor):
|
||||
try:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), video_id)
|
||||
if error.get('exception') == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(error['description'], expected=True)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user