Updated ytdlp version

This commit is contained in:
itdominator 2023-08-13 20:13:21 -05:00
parent 5264103f31
commit ee3e042b1b
344 changed files with 20808 additions and 7875 deletions

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,7 @@ import optparse
import os import os
import re import re
import sys import sys
import traceback
from .compat import compat_shlex_quote from .compat import compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
@ -56,11 +57,11 @@ from .utils import (
read_stdin, read_stdin,
render_table, render_table,
setproctitle, setproctitle,
std_headers,
traverse_obj, traverse_obj,
variadic, variadic,
write_string, write_string,
) )
from .utils.networking import std_headers
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
_IN_CLI = False _IN_CLI = False
@ -187,8 +188,8 @@ def validate_options(opts):
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
# Usernames and passwords # Usernames and passwords
validate(not opts.usenetrc or (opts.username is None and opts.password is None), validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
'.netrc', msg='using {name} conflicts with giving username/password') msg='{name}, netrc command and username/password are mutually exclusive options')
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
validate(opts.ap_password is None or opts.ap_username is not None, validate(opts.ap_password is None or opts.ap_username is not None,
'TV Provider account username', msg='{name} missing') 'TV Provider account username', msg='{name} missing')
@ -318,31 +319,50 @@ def validate_options(opts):
if outtmpl_default == '': if outtmpl_default == '':
opts.skip_download = None opts.skip_download = None
del opts.outtmpl['default'] del opts.outtmpl['default']
if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
raise ValueError(
'Cannot download a video and extract audio into the same file! '
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
def parse_chapters(name, value): def parse_chapters(name, value, advanced=False):
chapters, ranges = [], []
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
for regex in value or []: TIMESTAMP_RE = r'''(?x)(?:
if regex.startswith('*'): (?P<start_sign>-?)(?P<start>[^-]+)
for range_ in map(str.strip, regex[1:].split(',')): )?\s*-\s*(?:
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) (?P<end_sign>-?)(?P<end>[^-]+)
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) )?'''
if None in (dur or [None]):
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
ranges.append(dur)
continue
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
return chapters, ranges
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters) chapters, ranges, from_url = [], [], False
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges)) for regex in value or []:
if advanced and regex == '*from-url':
from_url = True
continue
elif not regex.startswith('*'):
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
continue
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
err = None
if None in (dur or [None]):
err = 'Must be of the form "*start-end"'
elif not advanced and any(signs):
err = 'Negative timestamps are not allowed'
else:
dur[0] *= -1 if signs[0] else 1
dur[1] *= -1 if signs[1] else 1
if dur[1] == float('-inf'):
err = '"-inf" is not a valid end'
if err:
raise ValueError(f'invalid {name} time range "{regex}". {err}')
ranges.append(dur)
return chapters, ranges, from_url
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
# Cookies from browser # Cookies from browser
if opts.cookiesfrombrowser: if opts.cookiesfrombrowser:
@ -400,14 +420,19 @@ def validate_options(opts):
except Exception as err: except Exception as err:
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
if geo_bypass_code is not None: if opts.geo_bypass.lower() not in ('default', 'never'):
try: try:
GeoUtils.random_ipv4(geo_bypass_code) GeoUtils.random_ipv4(opts.geo_bypass)
except Exception: except Exception:
raise ValueError('unsupported geo-bypass country or ip-block') raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
if len(opts.geo_bypass) == 2:
opts.geo_bypass_country = opts.geo_bypass
else:
opts.geo_bypass_ip_block = opts.geo_bypass
opts.geo_bypass = opts.geo_bypass.lower() != 'never'
opts.match_filter = match_filter_func(opts.match_filter) opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
if opts.download_archive is not None: if opts.download_archive is not None:
opts.download_archive = expand_path(opts.download_archive) opts.download_archive = expand_path(opts.download_archive)
@ -434,6 +459,10 @@ def validate_options(opts):
elif ed and proto == 'default': elif ed and proto == 'default':
default_downloader = ed.get_basename() default_downloader = ed.get_basename()
for policy in opts.color.values():
if policy not in ('always', 'auto', 'no_color', 'never'):
raise ValueError(f'"{policy}" is not a valid color policy')
warnings, deprecation_warnings = [], [] warnings, deprecation_warnings = [], []
# Common mistake: -f best # Common mistake: -f best
@ -708,7 +737,8 @@ def parse_options(argv=None):
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
)) ))
opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint) if opts.quiet is None:
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
@ -734,6 +764,7 @@ def parse_options(argv=None):
return ParsedOptions(parser, opts, urls, { return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location, 'netrc_location': opts.netrc_location,
'netrc_cmd': opts.netrc_cmd,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,
'twofactor': opts.twofactor, 'twofactor': opts.twofactor,
@ -891,7 +922,7 @@ def parse_options(argv=None):
'playlist_items': opts.playlist_items, 'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize, 'xattr_set_filesize': opts.xattr_set_filesize,
'match_filter': opts.match_filter, 'match_filter': opts.match_filter,
'no_color': opts.no_color, 'color': opts.color,
'ffmpeg_location': opts.ffmpeg_location, 'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native, 'hls_prefer_native': opts.hls_prefer_native,
'hls_use_mpegts': opts.hls_use_mpegts, 'hls_use_mpegts': opts.hls_use_mpegts,
@ -935,14 +966,18 @@ def _real_main(argv=None):
if opts.rm_cachedir: if opts.rm_cachedir:
ydl.cache.remove() ydl.cache.remove()
updater = Updater(ydl) try:
if opts.update_self and updater.update() and actual_use: updater = Updater(ydl, opts.update_self)
if updater.cmd: if opts.update_self and updater.update() and actual_use:
return updater.restart() if updater.cmd:
# This code is reachable only for zip variant in py < 3.10 return updater.restart()
# It makes sense to exit here, but the old behavior is to continue # This code is reachable only for zip variant in py < 3.10
ydl.report_warning('Restart yt-dlp to use the updated version') # It makes sense to exit here, but the old behavior is to continue
# return 100, 'ERROR: The program must exit for the update to complete' ydl.report_warning('Restart yt-dlp to use the updated version')
# return 100, 'ERROR: The program must exit for the update to complete'
except Exception:
traceback.print_exc()
ydl._download_retcode = 100
if not actual_use: if not actual_use:
if pre_process: if pre_process:
@ -956,6 +991,8 @@ def _real_main(argv=None):
parser.destroy() parser.destroy()
try: try:
if opts.load_info_filename is not None: if opts.load_info_filename is not None:
if all_urls:
ydl.report_warning('URLs are ignored due to --load-info-json')
return ydl.download_with_info_file(expand_path(opts.load_info_filename)) return ydl.download_with_info_file(expand_path(opts.load_info_filename))
else: else:
return ydl.download(all_urls) return ydl.download(all_urls)

View File

@ -1,30 +1,8 @@
import ast
import os
import sys import sys
from pathlib import Path
from PyInstaller.utils.hooks import collect_submodules from PyInstaller.utils.hooks import collect_submodules
def find_attribute_accesses(node, name, path=()):
if isinstance(node, ast.Attribute):
path = [*path, node.attr]
if isinstance(node.value, ast.Name) and node.value.id == name:
yield path[::-1]
for child in ast.iter_child_nodes(node):
yield from find_attribute_accesses(child, name, path)
def collect_used_submodules(name, level):
for dirpath, _, filenames in os.walk(Path(__file__).parent.parent):
for filename in filenames:
if not filename.endswith('.py'):
continue
with open(Path(dirpath) / filename, encoding='utf8') as f:
for submodule in find_attribute_accesses(ast.parse(f.read()), name):
yield '.'.join(submodule[:level])
def pycryptodome_module(): def pycryptodome_module():
try: try:
import Cryptodome # noqa: F401 import Cryptodome # noqa: F401
@ -40,13 +18,10 @@ def pycryptodome_module():
def get_hidden_imports(): def get_hidden_imports():
yield 'yt_dlp.compat._legacy' yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
yield pycryptodome_module()
yield from collect_submodules('websockets') yield from collect_submodules('websockets')
crypto = pycryptodome_module()
for sm in set(collect_used_submodules('Cryptodome', 2)):
yield f'{crypto}.{sm}'
# These are auto-detected, but explicitly add them just in case # These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi') yield from ('mutagen', 'brotli', 'certifi')

View File

@ -5,14 +5,14 @@ from .compat import compat_ord
from .dependencies import Cryptodome from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes from .utils import bytes_to_intlist, intlist_to_bytes
if Cryptodome: if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using pycryptodome """ """ Decrypt bytes with AES-CBC using pycryptodome """
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data) return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data)
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using pycryptodome """ """ Decrypt bytes with AES-GCM using pycryptodome """
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
else: else:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):

View File

@ -1,5 +1,4 @@
import contextlib import contextlib
import errno
import json import json
import os import os
import re import re
@ -39,11 +38,7 @@ class Cache:
fn = self._get_cache_fn(section, key, dtype) fn = self._get_cache_fn(section, key, dtype)
try: try:
try: os.makedirs(os.path.dirname(fn), exist_ok=True)
os.makedirs(os.path.dirname(fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
self._ydl.write_debug(f'Saving {section}.{key} to cache') self._ydl.write_debug(f'Saving {section}.{key} to cache')
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn) write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
except Exception: except Exception:

View File

@ -0,0 +1,5 @@
import warnings
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
casefold = str.casefold

View File

@ -1,14 +1,11 @@
import os import os
import sys import sys
import warnings
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from ._deprecated import * # noqa: F401, F403
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import passthrough_module(__name__, '._deprecated')
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( del passthrough_module
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
@ -70,3 +67,13 @@ if compat_os_name in ('nt', 'ce'):
return userhome + path[i:] return userhome + path[i:]
else: else:
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
def urllib_req_to_req(urllib_request):
"""Convert urllib Request to a networking Request"""
from ..networking import Request
from ..utils.networking import HTTPHeaderDict
return Request(
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)

View File

@ -1,4 +1,12 @@
"""Deprecated - New code should avoid these""" """Deprecated - New code should avoid these"""
import warnings
from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module
import base64 import base64
import urllib.error import urllib.error
@ -8,7 +16,6 @@ compat_str = str
compat_b64decode = base64.b64decode compat_b64decode = base64.b64decode
compat_HTTPError = urllib.error.HTTPError
compat_urlparse = urllib.parse compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote compat_urllib_parse_unquote = urllib.parse.unquote

View File

@ -1,5 +1,6 @@
""" Do not use! """ """ Do not use! """
import base64
import collections import collections
import ctypes import ctypes
import getpass import getpass
@ -15,12 +16,12 @@ import shlex
import shutil import shutil
import socket import socket
import struct import struct
import subprocess
import tokenize import tokenize
import urllib.error import urllib.error
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from subprocess import DEVNULL
# isort: split # isort: split
import asyncio # noqa: F401 import asyncio # noqa: F401
@ -29,10 +30,11 @@ from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401 from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
@ -47,41 +49,48 @@ def compat_setenv(key, value, env=os.environ):
env[key] = value env[key] = value
compat_base64_b64decode = base64.b64decode
compat_basestring = str compat_basestring = str
compat_casefold = str.casefold compat_casefold = str.casefold
compat_chr = chr compat_chr = chr
compat_collections_abc = collections.abc compat_collections_abc = collections.abc
compat_cookiejar = http.cookiejar compat_cookiejar = compat_http_cookiejar = http.cookiejar
compat_cookiejar_Cookie = http.cookiejar.Cookie compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
compat_cookies = http.cookies compat_cookies = compat_http_cookies = http.cookies
compat_cookies_SimpleCookie = http.cookies.SimpleCookie compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
compat_etree_Element = etree.Element compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
compat_etree_register_namespace = etree.register_namespace compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
compat_filter = filter compat_filter = filter
compat_get_terminal_size = shutil.get_terminal_size compat_get_terminal_size = shutil.get_terminal_size
compat_getenv = os.getenv compat_getenv = os.getenv
compat_getpass = getpass.getpass compat_getpass = compat_getpass_getpass = getpass.getpass
compat_html_entities = html.entities compat_html_entities = html.entities
compat_html_entities_html5 = html.entities.html5 compat_html_entities_html5 = html.entities.html5
compat_HTMLParser = html.parser.HTMLParser compat_html_parser_HTMLParseError = compat_HTMLParseError
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client compat_http_client = http.client
compat_http_server = http.server compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input compat_input = input
compat_integer_types = (int, ) compat_integer_types = (int, )
compat_itertools_count = itertools.count compat_itertools_count = itertools.count
compat_kwargs = lambda kwargs: kwargs compat_kwargs = lambda kwargs: kwargs
compat_map = map compat_map = map
compat_numeric_types = (int, float, complex) compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = compat_realpath
compat_print = print compat_print = print
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
compat_Struct = struct.Struct compat_Struct = struct.Struct
compat_struct_pack = struct.pack compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError
compat_urllib_parse = urllib.parse compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote compat_urllib_parse_quote = urllib.parse.quote
compat_urllib_parse_quote_plus = urllib.parse.quote_plus compat_urllib_parse_quote_plus = urllib.parse.quote_plus
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
@ -90,8 +99,10 @@ compat_urllib_parse_urlunparse = urllib.parse.urlunparse
compat_urllib_request = urllib.request compat_urllib_request = urllib.request
compat_urllib_request_DataHandler = urllib.request.DataHandler compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_response = urllib.response compat_urllib_response = urllib.response
compat_urlretrieve = urllib.request.urlretrieve compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = etree.ParseError compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_zip = zip compat_zip = zip
workaround_optparse_bug9161 = lambda: None workaround_optparse_bug9161 = lambda: None
legacy = []

View File

@ -48,7 +48,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
"""Passthrough parent module into a child module, creating the parent if necessary""" """Passthrough parent module into a child module, creating the parent if necessary"""
def __getattr__(attr): def __getattr__(attr):
if _is_package(parent): if _is_package(parent):
with contextlib.suppress(ImportError): with contextlib.suppress(ModuleNotFoundError):
return importlib.import_module(f'.{attr}', parent.__name__) return importlib.import_module(f'.{attr}', parent.__name__)
ret = from_child(attr) ret = from_child(attr)

View File

@ -0,0 +1,13 @@
# flake8: noqa: F405
from types import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'types')
del passthrough_module
try:
# NB: pypy has builtin NoneType, so checking NameError won't work
from types import NoneType # >= 3.10
except ImportError:
NoneType = type(None)

View File

@ -0,0 +1,10 @@
# flake8: noqa: F405
from urllib import * # noqa: F403
del request
from . import request # noqa: F401
from ..compat_utils import passthrough_module
passthrough_module(__name__, 'urllib')
del passthrough_module

View File

@ -0,0 +1,40 @@
# flake8: noqa: F405
from urllib.request import * # noqa: F403
from ..compat_utils import passthrough_module
passthrough_module(__name__, 'urllib.request')
del passthrough_module
from .. import compat_os_name
if compat_os_name == 'nt':
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
# it to http on these older python versions to avoid issues
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
# 1: https://github.com/python/cpython/issues/86793
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
import sys
from urllib.request import getproxies_environment, getproxies_registry
def getproxies_registry_patched():
proxies = getproxies_registry()
if (
sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final
):
return proxies
for scheme in ('https', 'ftp'):
if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
return proxies
def getproxies():
return getproxies_environment() or getproxies_registry_patched()
del compat_os_name

View File

@ -1,7 +1,9 @@
import base64 import base64
import collections
import contextlib import contextlib
import http.cookiejar import http.cookiejar
import http.cookies import http.cookies
import io
import json import json
import os import os
import re import re
@ -11,6 +13,7 @@ import subprocess
import sys import sys
import tempfile import tempfile
import time import time
import urllib.request
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from enum import Enum, auto from enum import Enum, auto
from hashlib import pbkdf2_hmac from hashlib import pbkdf2_hmac
@ -20,6 +23,7 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import functools
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -28,36 +32,24 @@ from .dependencies import (
from .minicurses import MultilinePrinter, QuietMultilinePrinter from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import ( from .utils import (
Popen, Popen,
YoutubeDLCookieJar,
error_to_str, error_to_str,
expand_path, expand_path,
is_path_like, is_path_like,
sanitize_url,
str_or_none,
try_call, try_call,
write_string,
) )
from .utils._utils import _YDLLogger
from .utils.networking import normalize_url
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
class YDLLogger: class YDLLogger(_YDLLogger):
def __init__(self, ydl=None): def warning(self, message, only_once=False): # compat
self._ydl = ydl return super().warning(message, once=only_once)
def debug(self, message):
if self._ydl:
self._ydl.write_debug(message)
def info(self, message):
if self._ydl:
self._ydl.to_screen(f'[Cookies] {message}')
def warning(self, message, only_once=False):
if self._ydl:
self._ydl.report_warning(message, only_once)
def error(self, message):
if self._ydl:
self._ydl.report_error(message)
class ProgressBar(MultilinePrinter): class ProgressBar(MultilinePrinter):
_DELAY, _timer = 0.1, 0 _DELAY, _timer = 0.1, 0
@ -105,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
jar = YoutubeDLCookieJar(cookie_file) jar = YoutubeDLCookieJar(cookie_file)
if not is_filename or os.access(cookie_file, os.R_OK): if not is_filename or os.access(cookie_file, os.R_OK):
jar.load(ignore_discard=True, ignore_expires=True) jar.load()
cookie_jars.append(jar) cookie_jars.append(jar)
return _merge_cookie_jars(cookie_jars) return _merge_cookie_jars(cookie_jars)
@ -146,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json') containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK): if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
raise FileNotFoundError(f'could not read containers.json in {search_root}') raise FileNotFoundError(f'could not read containers.json in {search_root}')
with open(containers_path) as containers: with open(containers_path, encoding='utf8') as containers:
identities = json.load(containers).get('identities', []) identities = json.load(containers).get('identities', [])
container_id = next((context.get('userContextId') for context in identities if container in ( container_id = next((context.get('userContextId') for context in identities if container in (
context.get('name'), context.get('name'),
@ -346,7 +338,9 @@ class ChromeCookieDecryptor:
Linux: Linux:
- cookies are either v10 or v11 - cookies are either v10 or v11
- v10: AES-CBC encrypted with a fixed key - v10: AES-CBC encrypted with a fixed key
- also attempts empty password if decryption fails
- v11: AES-CBC encrypted with an OS protected key (keyring) - v11: AES-CBC encrypted with an OS protected key (keyring)
- also attempts empty password if decryption fails
- v11 keys can be stored in various places depending on the activate desktop environment [2] - v11 keys can be stored in various places depending on the activate desktop environment [2]
Mac: Mac:
@ -361,7 +355,7 @@ class ChromeCookieDecryptor:
Sources: Sources:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
- KeyStorageLinux::CreateService - KeyStorageLinux::CreateService
""" """
@ -383,32 +377,49 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger, *, keyring=None): def __init__(self, browser_keyring_name, logger, *, keyring=None):
self._logger = logger self._logger = logger
self._v10_key = self.derive_key(b'peanuts') self._v10_key = self.derive_key(b'peanuts')
password = _get_linux_keyring_password(browser_keyring_name, keyring, logger) self._empty_key = self.derive_key(b'')
self._v11_key = None if password is None else self.derive_key(password)
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
self._browser_keyring_name = browser_keyring_name
self._keyring = keyring
@functools.cached_property
def _v11_key(self):
password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
return None if password is None else self.derive_key(password)
@staticmethod @staticmethod
def derive_key(password): def derive_key(password):
# values from # values from
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
def decrypt(self, encrypted_value): def decrypt(self, encrypted_value):
"""
following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
with an empty password. The failure detection is not the same as what chromium uses so the
results won't be perfect
References:
- [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
- a bugfix to try an empty password as a fallback
"""
version = encrypted_value[:3] version = encrypted_value[:3]
ciphertext = encrypted_value[3:] ciphertext = encrypted_value[3:]
if version == b'v10': if version == b'v10':
self._cookie_counts['v10'] += 1 self._cookie_counts['v10'] += 1
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
elif version == b'v11': elif version == b'v11':
self._cookie_counts['v11'] += 1 self._cookie_counts['v11'] += 1
if self._v11_key is None: if self._v11_key is None:
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
else: else:
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
return None return None
@ -423,7 +434,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
@staticmethod @staticmethod
def derive_key(password): def derive_key(password):
# values from # values from
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
def decrypt(self, encrypted_value): def decrypt(self, encrypted_value):
@ -436,12 +447,12 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
# other prefixes are considered 'old data' which were stored as plaintext # other prefixes are considered 'old data' which were stored as plaintext
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
return encrypted_value return encrypted_value
@ -461,7 +472,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None return None
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
# kNonceLength # kNonceLength
nonce_length = 96 // 8 nonce_length = 96 // 8
# boringssl # boringssl
@ -478,23 +489,27 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
# any other prefix means the data is DPAPI encrypted # any other prefix means the data is DPAPI encrypted
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
def _extract_safari_cookies(profile, logger): def _extract_safari_cookies(profile, logger):
if profile is not None:
logger.error('safari does not support profiles')
if sys.platform != 'darwin': if sys.platform != 'darwin':
raise ValueError(f'unsupported platform: {sys.platform}') raise ValueError(f'unsupported platform: {sys.platform}')
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') if profile:
cookies_path = os.path.expanduser(profile)
if not os.path.isfile(cookies_path):
logger.debug('Trying secondary cookie location')
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
if not os.path.isfile(cookies_path): if not os.path.isfile(cookies_path):
raise FileNotFoundError('could not find safari cookies database') raise FileNotFoundError('custom safari cookies database not found')
else:
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
if not os.path.isfile(cookies_path):
logger.debug('Trying secondary cookie location')
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
if not os.path.isfile(cookies_path):
raise FileNotFoundError('could not find safari cookies database')
with open(cookies_path, 'rb') as f: with open(cookies_path, 'rb') as f:
cookies_data = f.read() cookies_data = f.read()
@ -657,19 +672,27 @@ class _LinuxDesktopEnvironment(Enum):
""" """
OTHER = auto() OTHER = auto()
CINNAMON = auto() CINNAMON = auto()
DEEPIN = auto()
GNOME = auto() GNOME = auto()
KDE = auto() KDE3 = auto()
KDE4 = auto()
KDE5 = auto()
KDE6 = auto()
PANTHEON = auto() PANTHEON = auto()
UKUI = auto()
UNITY = auto() UNITY = auto()
XFCE = auto() XFCE = auto()
LXQT = auto()
class _LinuxKeyring(Enum): class _LinuxKeyring(Enum):
""" """
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
SelectedLinuxBackend SelectedLinuxBackend
""" """
KWALLET = auto() KWALLET = auto() # KDE4
KWALLET5 = auto()
KWALLET6 = auto()
GNOMEKEYRING = auto() GNOMEKEYRING = auto()
BASICTEXT = auto() BASICTEXT = auto()
@ -677,7 +700,7 @@ class _LinuxKeyring(Enum):
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
def _get_linux_desktop_environment(env): def _get_linux_desktop_environment(env, logger):
""" """
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
GetDesktopEnvironment GetDesktopEnvironment
@ -692,51 +715,97 @@ def _get_linux_desktop_environment(env):
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
else: else:
return _LinuxDesktopEnvironment.UNITY return _LinuxDesktopEnvironment.UNITY
elif xdg_current_desktop == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif xdg_current_desktop == 'GNOME': elif xdg_current_desktop == 'GNOME':
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
elif xdg_current_desktop == 'X-Cinnamon': elif xdg_current_desktop == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON return _LinuxDesktopEnvironment.CINNAMON
elif xdg_current_desktop == 'KDE': elif xdg_current_desktop == 'KDE':
return _LinuxDesktopEnvironment.KDE kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5
elif kde_version == '6':
return _LinuxDesktopEnvironment.KDE6
elif kde_version == '4':
return _LinuxDesktopEnvironment.KDE4
else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4
elif xdg_current_desktop == 'Pantheon': elif xdg_current_desktop == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON return _LinuxDesktopEnvironment.PANTHEON
elif xdg_current_desktop == 'XFCE': elif xdg_current_desktop == 'XFCE':
return _LinuxDesktopEnvironment.XFCE return _LinuxDesktopEnvironment.XFCE
elif xdg_current_desktop == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
else:
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif desktop_session is not None: elif desktop_session is not None:
if desktop_session in ('mate', 'gnome'): if desktop_session == 'deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif desktop_session in ('mate', 'gnome'):
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
elif 'kde' in desktop_session: elif desktop_session in ('kde4', 'kde-plasma'):
return _LinuxDesktopEnvironment.KDE return _LinuxDesktopEnvironment.KDE4
elif 'xfce' in desktop_session: elif desktop_session == 'kde':
if 'KDE_SESSION_VERSION' in env:
return _LinuxDesktopEnvironment.KDE4
else:
return _LinuxDesktopEnvironment.KDE3
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
return _LinuxDesktopEnvironment.XFCE return _LinuxDesktopEnvironment.XFCE
elif desktop_session == 'ukui':
return _LinuxDesktopEnvironment.UKUI
else:
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
else: else:
if 'GNOME_DESKTOP_SESSION_ID' in env: if 'GNOME_DESKTOP_SESSION_ID' in env:
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
elif 'KDE_FULL_SESSION' in env: elif 'KDE_FULL_SESSION' in env:
return _LinuxDesktopEnvironment.KDE if 'KDE_SESSION_VERSION' in env:
return _LinuxDesktopEnvironment.KDE4
else:
return _LinuxDesktopEnvironment.KDE3
return _LinuxDesktopEnvironment.OTHER return _LinuxDesktopEnvironment.OTHER
def _choose_linux_keyring(logger): def _choose_linux_keyring(logger):
""" """
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc SelectBackend in [1]
SelectBackend
There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
`Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
does not appear to be called anywhere other than in tests, so the user would have to create this file manually
and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
References:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
""" """
desktop_environment = _get_linux_desktop_environment(os.environ) desktop_environment = _get_linux_desktop_environment(os.environ, logger)
logger.debug(f'detected desktop environment: {desktop_environment.name}') logger.debug(f'detected desktop environment: {desktop_environment.name}')
if desktop_environment == _LinuxDesktopEnvironment.KDE: if desktop_environment == _LinuxDesktopEnvironment.KDE4:
linux_keyring = _LinuxKeyring.KWALLET linux_keyring = _LinuxKeyring.KWALLET
elif desktop_environment == _LinuxDesktopEnvironment.OTHER: elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
linux_keyring = _LinuxKeyring.KWALLET5
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
linux_keyring = _LinuxKeyring.KWALLET6
elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
):
linux_keyring = _LinuxKeyring.BASICTEXT linux_keyring = _LinuxKeyring.BASICTEXT
else: else:
linux_keyring = _LinuxKeyring.GNOMEKEYRING linux_keyring = _LinuxKeyring.GNOMEKEYRING
return linux_keyring return linux_keyring
def _get_kwallet_network_wallet(logger): def _get_kwallet_network_wallet(keyring, logger):
""" The name of the wallet used to store network passwords. """ The name of the wallet used to store network passwords.
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
KWalletDBus::NetworkWallet KWalletDBus::NetworkWallet
which does a dbus call to the following function: which does a dbus call to the following function:
https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
@ -744,10 +813,22 @@ def _get_kwallet_network_wallet(logger):
""" """
default_wallet = 'kdewallet' default_wallet = 'kdewallet'
try: try:
if keyring == _LinuxKeyring.KWALLET:
service_name = 'org.kde.kwalletd'
wallet_path = '/modules/kwalletd'
elif keyring == _LinuxKeyring.KWALLET5:
service_name = 'org.kde.kwalletd5'
wallet_path = '/modules/kwalletd5'
elif keyring == _LinuxKeyring.KWALLET6:
service_name = 'org.kde.kwalletd6'
wallet_path = '/modules/kwalletd6'
else:
raise ValueError(keyring)
stdout, _, returncode = Popen.run([ stdout, _, returncode = Popen.run([
'dbus-send', '--session', '--print-reply=literal', 'dbus-send', '--session', '--print-reply=literal',
'--dest=org.kde.kwalletd5', f'--dest={service_name}',
'/modules/kwalletd5', wallet_path,
'org.kde.KWallet.networkWallet' 'org.kde.KWallet.networkWallet'
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
@ -762,8 +843,8 @@ def _get_kwallet_network_wallet(logger):
return default_wallet return default_wallet
def _get_kwallet_password(browser_keyring_name, logger): def _get_kwallet_password(browser_keyring_name, keyring, logger):
logger.debug('using kwallet-query to obtain password from kwallet') logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
if shutil.which('kwallet-query') is None: if shutil.which('kwallet-query') is None:
logger.error('kwallet-query command not found. KWallet and kwallet-query ' logger.error('kwallet-query command not found. KWallet and kwallet-query '
@ -771,7 +852,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
'included in the kwallet package for your distribution') 'included in the kwallet package for your distribution')
return b'' return b''
network_wallet = _get_kwallet_network_wallet(logger) network_wallet = _get_kwallet_network_wallet(keyring, logger)
try: try:
stdout, _, returncode = Popen.run([ stdout, _, returncode = Popen.run([
@ -793,8 +874,9 @@ def _get_kwallet_password(browser_keyring_name, logger):
# checks hasEntry. To verify this: # checks hasEntry. To verify this:
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return" # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
# while starting chrome. # while starting chrome.
# this may be a bug as the intended behaviour is to generate a random password and store # this was identified as a bug later and fixed in
# it, but that doesn't matter here. # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
# https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
return b'' return b''
else: else:
logger.debug('password found') logger.debug('password found')
@ -832,8 +914,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
logger.debug(f'Chosen keyring: {keyring.name}') logger.debug(f'Chosen keyring: {keyring.name}')
if keyring == _LinuxKeyring.KWALLET: if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
return _get_kwallet_password(browser_keyring_name, logger) return _get_kwallet_password(browser_keyring_name, keyring, logger)
elif keyring == _LinuxKeyring.GNOMEKEYRING: elif keyring == _LinuxKeyring.GNOMEKEYRING:
return _get_gnome_keyring_password(browser_keyring_name, logger) return _get_gnome_keyring_password(browser_keyring_name, logger)
elif keyring == _LinuxKeyring.BASICTEXT: elif keyring == _LinuxKeyring.BASICTEXT:
@ -861,6 +943,10 @@ def _get_mac_keyring_password(browser_keyring_name, logger):
def _get_windows_v10_key(browser_root, logger): def _get_windows_v10_key(browser_root, logger):
"""
References:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
"""
path = _find_most_recently_used_file(browser_root, 'Local State', logger) path = _find_most_recently_used_file(browser_root, 'Local State', logger)
if path is None: if path is None:
logger.error('could not find local state file') logger.error('could not find local state file')
@ -869,11 +955,13 @@ def _get_windows_v10_key(browser_root, logger):
with open(path, encoding='utf8') as f: with open(path, encoding='utf8') as f:
data = json.load(f) data = json.load(f)
try: try:
# kOsCryptEncryptedKeyPrefName in [1]
base64_key = data['os_crypt']['encrypted_key'] base64_key = data['os_crypt']['encrypted_key']
except KeyError: except KeyError:
logger.error('no encrypted key in Local State') logger.error('no encrypted key in Local State')
return None return None
encrypted_key = base64.b64decode(base64_key) encrypted_key = base64.b64decode(base64_key)
# kDPAPIKeyPrefix in [1]
prefix = b'DPAPI' prefix = b'DPAPI'
if not encrypted_key.startswith(prefix): if not encrypted_key.startswith(prefix):
logger.error('invalid key') logger.error('invalid key')
@ -885,13 +973,15 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac('sha1', password, salt, iterations, key_length) return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) for key in keys:
try: plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
return plaintext.decode() try:
except UnicodeDecodeError: return plaintext.decode()
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) except UnicodeDecodeError:
return None pass
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
return None
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
@ -1085,3 +1175,150 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
else: else:
morsel = None morsel = None
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename
@staticmethod
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'
@contextlib.contextmanager
def open(self, file, *, write=False):
if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if write:
file.truncate(0)
yield file
def _really_save(self, f, ignore_discard, ignore_expires):
now = time.time()
for cookie in self:
if (not ignore_discard and cookie.discard
or not ignore_expires and cookie.is_expired(now)):
continue
name, value = cookie.name, cookie.value
if value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
"""
Save cookies to a file.
Code is taken from CPython 3.6
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0
with self.open(filename, write=True) as f:
f.write(self._HEADER)
self._really_save(f, ignore_discard, ignore_expires)
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True
def get_cookie_header(self, url):
"""Generate a Cookie HTTP header for a given url"""
cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie')
def get_cookies_for_url(self, url):
"""Generate a list of Cookie objects for a given url"""
# Policy `_now` attribute must be set before calling `_cookies_for_request`
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
self._policy._now = self._now = int(time.time())
return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
def clear(self, *args, **kwargs):
with contextlib.suppress(KeyError):
return super().clear(*args, **kwargs)

View File

@ -1,6 +1,3 @@
import types
from ..compat import functools
from ..compat.compat_utils import passthrough_module from ..compat.compat_utils import passthrough_module
try: try:
@ -9,22 +6,33 @@ except ImportError:
try: try:
import Crypto as _parent import Crypto as _parent
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
_parent = types.ModuleType('no_Cryptodome') _parent = passthrough_module(__name__, 'no_Cryptodome')
__bool__ = lambda: False __bool__ = lambda: False
passthrough_module(__name__, _parent, (..., '__version__'))
del passthrough_module del passthrough_module
__version__ = ''
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
try:
if _parent.__name__ == 'Cryptodome':
from Cryptodome import __version__
from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5
from Cryptodome.Hash import CMAC, SHA1
from Cryptodome.PublicKey import RSA
elif _parent.__name__ == 'Crypto':
from Crypto import __version__
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401
except ImportError:
__version__ = f'broken {__version__}'.strip()
@property
@functools.cache _yt_dlp__identifier = _parent.__name__
def _yt_dlp__identifier(): if AES and _yt_dlp__identifier == 'Crypto':
if _parent.__name__ == 'Crypto': try:
from Crypto.Cipher import AES # In pycrypto, mode defaults to ECB. See:
try: # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
# In pycrypto, mode defaults to ECB. See: AES.new(b'abcdefghijklmnop')
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode except TypeError:
AES.new(b'abcdefghijklmnop') _yt_dlp__identifier = 'pycrypto'
except TypeError:
return 'pycrypto'
return _parent.__name__

View File

@ -73,7 +73,7 @@ available_dependencies = {k: v for k, v in all_dependencies.items() if v}
# Deprecated # Deprecated
Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None Cryptodome_AES = Cryptodome.AES
__all__ = [ __all__ = [

View File

@ -30,7 +30,7 @@ from .hls import HlsFD
from .http import HttpFD from .http import HttpFD
from .ism import IsmFD from .ism import IsmFD
from .mhtml import MhtmlFD from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD from .niconico import NiconicoDmcFD, NiconicoLiveFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD from .websocket import WebSocketFragmentFD
@ -50,6 +50,7 @@ PROTOCOL_MAP = {
'ism': IsmFD, 'ism': IsmFD,
'mhtml': MhtmlFD, 'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD, 'niconico_dmc': NiconicoDmcFD,
'niconico_live': NiconicoLiveFD,
'fc2_live': FC2LiveFD, 'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD, 'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat': YoutubeLiveChatFD,

View File

@ -49,10 +49,10 @@ class FileDownloader:
verbose: Print additional info to stdout. verbose: Print additional info to stdout.
quiet: Do not print messages to stdout. quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec. ratelimit: Download speed limit, in bytes/sec.
continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx retries: Number of times to retry for expected network errors.
file_access_retries: Number of times to retry on file access error Default is 0 for API, but 10 for CLI
file_access_retries: Number of times to retry on file access error (default: 3)
buffersize: Size of download buffer in bytes. buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer. noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible. continuedl: Try to continue downloads if possible.
@ -138,17 +138,21 @@ class FileDownloader:
def format_percent(percent): def format_percent(percent):
return ' N/A%' if percent is None else f'{percent:>5.1f}%' return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod @classmethod
def calc_eta(start, now, total, current): def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
if total is NO_DEFAULT:
rate, remaining = start_or_rate, now_or_remaining
if None in (rate, remaining):
return None
return int(float(remaining) / rate)
start, now = start_or_rate, now_or_remaining
if total is None: if total is None:
return None return None
if now is None: if now is None:
now = time.time() now = time.time()
dif = now - start rate = cls.calc_speed(start, now, current)
if current == 0 or dif < 0.001: # One millisecond return rate and int((float(total) - float(current)) / rate)
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod @staticmethod
def calc_speed(start, now, bytes): def calc_speed(start, now, bytes):
@ -165,6 +169,12 @@ class FileDownloader:
def format_retries(retries): def format_retries(retries):
return 'inf' if retries == float('inf') else int(retries) return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def filesize_or_none(unencoded_filename):
if os.path.isfile(unencoded_filename):
return os.path.getsize(unencoded_filename)
return 0
@staticmethod @staticmethod
def best_block_size(elapsed_time, bytes): def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0) new_min = max(bytes / 2.0, 1.0)
@ -225,7 +235,7 @@ class FileDownloader:
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
def wrapper(self, func, *args, **kwargs): def wrapper(self, func, *args, **kwargs):
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self): for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
try: try:
return func(self, *args, **kwargs) return func(self, *args, **kwargs)
except OSError as err: except OSError as err:
@ -245,7 +255,8 @@ class FileDownloader:
@wrap_file_access('remove') @wrap_file_access('remove')
def try_remove(self, filename): def try_remove(self, filename):
os.remove(filename) if os.path.isfile(filename):
os.remove(filename)
@wrap_file_access('rename') @wrap_file_access('rename')
def try_rename(self, old_filename, new_filename): def try_rename(self, old_filename, new_filename):
@ -285,7 +296,8 @@ class FileDownloader:
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else: else:
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
def _finish_multiline_status(self): def _finish_multiline_status(self):
self._multiline.end() self._multiline.end()
@ -407,7 +419,6 @@ class FileDownloader:
"""Download to a filename using the info from info_dict """Download to a filename using the info from info_dict
Return True on success and False otherwise Return True on success and False otherwise
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(encodeFilename(filename))

View File

@ -1,14 +1,16 @@
import enum import enum
import json import json
import os.path import os
import re import re
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
import uuid import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools from ..compat import functools
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import ( from ..utils import (
Popen, Popen,
@ -23,9 +25,7 @@ from ..utils import (
encodeArgument, encodeArgument,
encodeFilename, encodeFilename,
find_available_port, find_available_port,
handle_youtubedl_headers,
remove_end, remove_end,
sanitized_Request,
traverse_obj, traverse_obj,
) )
@ -43,6 +43,7 @@ class ExternalFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
self._cookies_tempfile = None
try: try:
started = time.time() started = time.time()
@ -55,6 +56,9 @@ class ExternalFD(FragmentFD):
# should take place # should take place
retval = 0 retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename()) self.to_screen('[%s] Interrupted by user' % self.get_basename())
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
if retval == 0: if retval == 0:
status = { status = {
@ -126,6 +130,16 @@ class ExternalFD(FragmentFD):
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs) keys, *args, **kwargs)
def _write_cookies(self):
if not self.ydl.cookiejar.filename:
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
tmp_cookies.close()
self._cookies_tempfile = tmp_cookies.name
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
self.ydl.cookiejar.save(self._cookies_tempfile)
return self.ydl.cookiejar.filename or self._cookies_tempfile
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """ """ Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@ -176,7 +190,7 @@ class ExternalFD(FragmentFD):
return 0 return 0
def _call_process(self, cmd, info_dict): def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE) return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
@ -185,6 +199,9 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['--cookie', cookie_header]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -215,6 +232,9 @@ class AxelFD(ExternalFD):
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['-H', f'{key}: {val}'] cmd += ['-H', f'{key}: {val}']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
cmd += self._configuration_args() cmd += self._configuration_args()
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@ -224,7 +244,9 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies', self._write_cookies()]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -272,7 +294,7 @@ class Aria2cFD(ExternalFD):
return super()._call_downloader(tmpfilename, info_dict) return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', cmd = [self.exe, '-c', '--no-conf',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict: if 'fragments' in info_dict:
@ -280,6 +302,8 @@ class Aria2cFD(ExternalFD):
else: else:
cmd += ['--min-split-size', '1M'] cmd += ['--min-split-size', '1M']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += [f'--load-cookies={self._write_cookies()}']
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -334,13 +358,12 @@ class Aria2cFD(ExternalFD):
'method': method, 'method': method,
'params': [f'token:{rpc_secret}', *params], 'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8') }).encode('utf-8')
request = sanitized_Request( request = Request(
f'http://localhost:{rpc_port}/jsonrpc', f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={ data=d, headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Content-Length': f'{len(d)}', 'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__', }, proxies={'all': None})
})
with self.ydl.urlopen(request) as r: with self.ydl.urlopen(request) as r:
resp = json.load(r) resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
@ -418,6 +441,14 @@ class HttpieFD(ExternalFD):
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += [f'{key}:{val}'] cmd += [f'{key}:{val}']
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
# 2: https://httpie.io/docs/cli/sessions
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += [f'Cookie:{cookie_header}']
return cmd return cmd
@ -528,11 +559,16 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict] selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats): for i, fmt in enumerate(selected_formats):
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']): is_http = re.match(r'^https?://', fmt['url'])
headers_dict = handle_youtubedl_headers(fmt['http_headers']) cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
for cookie in cookies)])
if fmt.get('http_headers') and is_http:
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())]) args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
if start_time: if start_time:
args += ['-ss', str(start_time)] args += ['-ss', str(start_time)]

View File

@ -3,11 +3,11 @@ import io
import itertools import itertools
import struct import struct
import time import time
import urllib.error
import urllib.parse import urllib.parse
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import fix_xml_ampersands, xpath_text from ..utils import fix_xml_ampersands, xpath_text
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl() man_url = urlh.url
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/ytdl-org/youtube-dl/issues/7823) # and https://github.com/ytdl-org/youtube-dl/issues/7823)
@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
if box_type == b'mdat': if box_type == b'mdat':
self._append_fragment(ctx, box_data) self._append_fragment(ctx, box_data)
break break
except urllib.error.HTTPError as err: except HTTPError as err:
if live and (err.code == 404 or err.code == 410): if live and (err.status == 404 or err.status == 410):
# We didn't keep up with the live window. Continue # We didn't keep up with the live window. Continue
# with the next available fragment. # with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i msg = 'Fragment %d unavailable' % frag_i

View File

@ -1,24 +1,19 @@
import concurrent.futures import concurrent.futures
import contextlib import contextlib
import http.client
import json import json
import math import math
import os import os
import struct import struct
import time import time
import urllib.error
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..networking import Request
DownloadError, from ..networking.exceptions import HTTPError, IncompleteRead
RetryManager, from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
encodeFilename, from ..utils.networking import HTTPHeaderDict
sanitized_Request,
traverse_obj,
)
class HttpQuietDownloader(HttpFD): class HttpQuietDownloader(HttpFD):
@ -34,8 +29,8 @@ class FragmentFD(FileDownloader):
Available options: Available options:
fragment_retries: Number of times to retry a fragment for HTTP error (DASH fragment_retries: Number of times to retry a fragment for HTTP error
and hlsnative only) (DASH and hlsnative only). Default is 0 for API, but 10 for CLI
skip_unavailable_fragments: skip_unavailable_fragments:
Skip unavailable fragments (DASH and hlsnative only) Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is keep_fragments: Keep downloaded fragments on disk after downloading is
@ -75,7 +70,7 @@ class FragmentFD(FileDownloader):
def _prepare_url(self, info_dict, url): def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers') headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url return Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx, info_dict): def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
@ -121,6 +116,11 @@ class FragmentFD(FileDownloader):
'request_data': request_data, 'request_data': request_data,
'ctx_id': ctx.get('ctx_id'), 'ctx_id': ctx.get('ctx_id'),
} }
frag_resume_len = 0
if ctx['dl'].params.get('continuedl', True):
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:
return False return False
@ -155,9 +155,7 @@ class FragmentFD(FileDownloader):
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
if 'live' not in ctx: if not ctx.setdefault('live', False):
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags'] total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0) ad_frags = ctx.get('ad_frags', 0)
if ad_frags: if ad_frags:
@ -170,15 +168,17 @@ class FragmentFD(FileDownloader):
**self.params, **self.params,
'noprogress': True, 'noprogress': True,
'test': False, 'test': False,
'sleep_interval': 0,
'max_sleep_interval': 0,
'sleep_interval_subtitles': 0,
}) })
tmpfilename = self.temp_name(ctx['filename']) tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb' open_mode = 'wb'
resume_len = 0
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)): resume_len = self.filesize_or_none(tmpfilename)
if resume_len > 0:
open_mode = 'ab' open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check # Should be initialized before ytdl file check
ctx.update({ ctx.update({
@ -187,7 +187,9 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@ -201,7 +203,12 @@ class FragmentFD(FileDownloader):
if 'ytdl_corrupt' in ctx: if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt'] del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
else: else:
if not continuedl:
if ytdl_file_exists:
self._read_ytdl_file(ctx)
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0 assert ctx['fragment_index'] == 0
@ -274,12 +281,10 @@ class FragmentFD(FileDownloader):
else: else:
frag_downloaded_bytes = s['downloaded_bytes'] frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
ctx['speed'] = state['speed'] = self.calc_speed( ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
if not ctx['live']:
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict) self._hook_progress(state, info_dict)
@ -290,14 +295,12 @@ class FragmentFD(FileDownloader):
def _finish_frag_download(self, ctx, info_dict): def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close() ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) self.try_remove(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started'] elapsed = time.time() - ctx['started']
to_file = ctx['tmpfilename'] != '-' to_file = ctx['tmpfilename'] != '-'
if to_file: if to_file:
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename'])) downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
else: else:
downloaded_bytes = ctx['complete_frags_downloaded_bytes'] downloaded_bytes = ctx['complete_frags_downloaded_bytes']
@ -449,7 +452,7 @@ class FragmentFD(FileDownloader):
frag_index = ctx['fragment_index'] = fragment['frag_index'] frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None ctx['last_error'] = None
headers = info_dict.get('http_headers', {}).copy() headers = HTTPHeaderDict(info_dict.get('http_headers'))
byte_range = fragment.get('byte_range') byte_range = fragment.get('byte_range')
if byte_range: if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
@ -466,9 +469,10 @@ class FragmentFD(FileDownloader):
for retry in RetryManager(self.params.get('fragment_retries'), error_callback): for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
try: try:
ctx['fragment_count'] = fragment.get('fragment_count') ctx['fragment_count'] = fragment.get('fragment_count')
if not self._download_fragment(ctx, fragment['url'], info_dict, headers): if not self._download_fragment(
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
return return
except (urllib.error.HTTPError, http.client.IncompleteRead) as err: except (HTTPError, IncompleteRead) as err:
retry.error = err retry.error = err
continue continue
except DownloadError: # has own retry settings except DownloadError: # has own retry settings
@ -496,7 +500,7 @@ class FragmentFD(FileDownloader):
download_fragment(fragment, ctx_copy) download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') self.report_warning('The download speed shown is only of one thread. This is a known issue')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
try: try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):

View File

@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'
@staticmethod @staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False): def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
return bool(re.search('|'.join((
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
)), manifest))
@classmethod
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
] ]
if not allow_unplayable_formats: if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [ UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
] ]
def check_results(): def check_results():
yield not info_dict.get('is_live') yield not info_dict.get('is_live')
for feature in UNSUPPORTED_FEATURES: for feature in UNSUPPORTED_FEATURES:
yield not re.search(feature, manifest) yield not re.search(feature, manifest)
if not allow_unplayable_formats:
yield not cls._has_drm(manifest)
return all(check_results()) return all(check_results())
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
@ -64,13 +75,13 @@ class HlsFD(FragmentFD):
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl() man_url = urlh.url
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download: if can_download:
has_ffmpeg = FFmpegFD.available() has_ffmpeg = FFmpegFD.available()
no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
if no_crypto and has_ffmpeg: if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
elif no_crypto: elif no_crypto:
@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download: if not can_download:
has_drm = re.search('|'.join([ if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
r'#EXT-X-FAXS-CM:', # Adobe Flash Access if info_dict.get('has_drm') and self.params.get('test'):
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
]), s) else:
if has_drm and not self.params.get('allow_unplayable_formats'): self.report_error(
self.report_error( 'This format is DRM protected; Try selecting another format with --format or '
'This video is DRM protected; Try selecting another format with --format or ' 'add --check-formats to automatically fallback to the next best format', tb=False)
'add --check-formats to automatically fallback to the next best format')
return False return False
message = message or 'Unsupported features have been detected' message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params) fd = FFmpegFD(self.ydl, self.params)

View File

@ -1,12 +1,14 @@
import http.client
import os import os
import random import random
import socket
import ssl
import time import time
import urllib.error
from .common import FileDownloader from .common import FileDownloader
from ..networking import Request
from ..networking.exceptions import (
CertificateVerifyError,
HTTPError,
TransportError,
)
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
RetryManager, RetryManager,
@ -16,18 +18,10 @@ from ..utils import (
encodeFilename, encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
sanitized_Request,
try_call, try_call,
write_xattr, write_xattr,
) )
from ..utils.networking import HTTPHeaderDict
RESPONSE_READ_EXCEPTIONS = (
TimeoutError,
socket.timeout, # compat: py < 3.10
ConnectionError,
ssl.SSLError,
http.client.HTTPException
)
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
@ -45,11 +39,8 @@ class HttpFD(FileDownloader):
ctx.tmpfilename = self.temp_name(filename) ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None ctx.stream = None
# Do not include the Accept-Encoding header # Disable compression
headers = {'Youtubedl-no-compression': 'True'} headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
is_test = self.params.get('test', False) is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else ( chunk_size = self._TEST_FILE_SIZE if is_test else (
@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
if try_call(lambda: range_end >= ctx.content_len): if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1 range_end = ctx.content_len - 1
request = sanitized_Request(url, request_data, headers) request = Request(url, request_data, headers)
has_range = range_start is not None has_range = range_start is not None
if has_range: if has_range:
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
# Establish connection # Establish connection
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
@ -150,20 +141,21 @@ class HttpFD(FileDownloader):
# Content-Range is either not present or invalid. Assuming remote webserver is # Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file # trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload # and performing entire redownload
self.report_unable_to_resume() elif range_start > 0:
self.report_unable_to_resume()
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
except urllib.error.HTTPError as err: except HTTPError as err:
if err.code == 416: if err.status == 416:
# Unable to resume (requested range not satisfiable) # Unable to resume (requested range not satisfiable)
try: try:
# Open the connection again without the range header # Open the connection again without the range header
ctx.data = self.ydl.urlopen( ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers)) Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length'] content_length = ctx.data.headers['Content-Length']
except urllib.error.HTTPError as err: except HTTPError as err:
if err.code < 500 or err.code >= 600: if err.status < 500 or err.status >= 600:
raise raise
else: else:
# Examine the reported length # Examine the reported length
@ -191,17 +183,13 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
return return
elif err.code < 500 or err.code >= 600: elif err.status < 500 or err.status >= 600:
# Unexpected HTTP error # Unexpected HTTP error
raise raise
raise RetryDownload(err) raise RetryDownload(err)
except urllib.error.URLError as err: except CertificateVerifyError:
if isinstance(err.reason, ssl.CertificateError): raise
raise except TransportError as err:
raise RetryDownload(err)
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
# Any errors that occur during this will not be wrapped by URLError
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err) raise RetryDownload(err)
def close_stream(): def close_stream():
@ -211,7 +199,12 @@ class HttpFD(FileDownloader):
ctx.stream = None ctx.stream = None
def download(): def download():
data_len = ctx.data.info().get('Content-length', None) data_len = ctx.data.headers.get('Content-length')
if ctx.data.headers.get('Content-encoding'):
# Content-encoding is present, Content-length is not reliable anymore as we are
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
data_len = None
# Range HTTP header may be ignored/unsupported by a webserver # Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py). # (e.g. extractor/scivee.py, extractor/bambuser.py).
@ -252,7 +245,7 @@ class HttpFD(FileDownloader):
try: try:
# Download and write # Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
except RESPONSE_READ_EXCEPTIONS as err: except TransportError as err:
retry(err) retry(err)
byte_counter += len(data_block) byte_counter += len(data_block)
@ -333,15 +326,15 @@ class HttpFD(FileDownloader):
elif speed: elif speed:
ctx.throttle_start = None ctx.throttle_start = None
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
if ctx.stream is None: if ctx.stream is None:
self.to_stderr('\n') self.to_stderr('\n')
self.report_error('Did not get any data blocks') self.report_error('Did not get any data blocks')
return False return False
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment()
if ctx.tmpfilename != '-': if ctx.tmpfilename != '-':
ctx.stream.close() ctx.stream.close()
@ -353,7 +346,7 @@ class HttpFD(FileDownloader):
# Update file modification time # Update file modification time
if self.params.get('updatetime', True): if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
self._hook_progress({ self._hook_progress({
'downloaded_bytes': byte_counter, 'downloaded_bytes': byte_counter,

View File

@ -2,9 +2,9 @@ import binascii
import io import io
import struct import struct
import time import time
import urllib.error
from .fragment import FragmentFD from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import RetryManager from ..utils import RetryManager
u8 = struct.Struct('>B') u8 = struct.Struct('>B')
@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
write_piff_header(ctx['dest_stream'], info_dict['_download_params']) write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)
except urllib.error.HTTPError as err: except HTTPError as err:
retry.error = err retry.error = err
continue continue

View File

@ -1,8 +1,12 @@
import json
import threading import threading
import time
from . import get_suitable_downloader from . import get_suitable_downloader
from .common import FileDownloader from .common import FileDownloader
from ..utils import sanitized_Request from .external import FFmpegFD
from ..networking import Request
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
class NiconicoDmcFD(FileDownloader): class NiconicoDmcFD(FileDownloader):
@ -24,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_data = heartbeat_info_dict['data'].encode()
heartbeat_interval = heartbeat_info_dict.get('interval', 30) heartbeat_interval = heartbeat_info_dict.get('interval', 30)
request = sanitized_Request(heartbeat_url, heartbeat_data) request = Request(heartbeat_url, heartbeat_data)
def heartbeat(): def heartbeat():
try: try:
@ -50,3 +54,93 @@ class NiconicoDmcFD(FileDownloader):
timer[0].cancel() timer[0].cancel()
download_complete = True download_complete = True
return success return success
class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
ws_url = info_dict['url']
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
cookies = info_dict.get('cookies')
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy()
new_info_dict.update({
'protocol': 'm3u8',
})
def communicate_ws(reconnect):
if reconnect:
ws = WebSocketsWrapper(ws_url, {
'Cookies': str_or_none(cookies) or '',
'Origin': f'https://{ws_origin_host}',
'Accept': '*/*',
'User-Agent': self.params['http_headers']['User-Agent'],
})
if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'stream': {
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'chasePlay': False
},
'room': {
'protocol': 'webSocket',
'commentable': True
},
'reconnect': True,
}
}))
else:
ws = ws_extractor
with ws:
while True:
recv = ws.recv()
if not recv:
continue
data = json.loads(recv)
if not data or not isinstance(data, dict):
continue
if data.get('type') == 'ping':
# pong back
ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'disconnect':
self.write_debug(data)
return True
elif data.get('type') == 'error':
self.write_debug(data)
message = try_get(data, lambda x: x['body']['code'], str) or recv
return DownloadError(message)
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv)
def ws_main():
reconnect = False
while True:
try:
ret = communicate_ws(reconnect)
if ret is True:
return
except BaseException as e:
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
time.sleep(10)
continue
finally:
reconnect = True
thread = threading.Thread(target=ws_main, daemon=True)
thread.start()
return dl.download(filename, new_info_dict)

View File

@ -1,8 +1,8 @@
import json import json
import time import time
import urllib.error
from .fragment import FragmentFD from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
RegexNotFoundError, RegexNotFoundError,
RetryManager, RetryManager,
@ -10,6 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
try_get, try_get,
) )
from ..utils.networking import HTTPHeaderDict
class YoutubeLiveChatFD(FragmentFD): class YoutubeLiveChatFD(FragmentFD):
@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
start_time = int(time.time() * 1000) start_time = int(time.time() * 1000)
def dl_fragment(url, data=None, headers=None): def dl_fragment(url, data=None, headers=None):
http_headers = info_dict.get('http_headers', {}) http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
if headers:
http_headers = http_headers.copy()
http_headers.update(headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data) return self._download_fragment(ctx, url, info_dict, http_headers, data)
def parse_actions_replay(live_chat_continuation): def parse_actions_replay(live_chat_continuation):
@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
or frag_index == 1 and try_refresh_replay_beginning or frag_index == 1 and try_refresh_replay_beginning
or parse_actions_replay) or parse_actions_replay)
return (True, *func(live_chat_continuation)) return (True, *func(live_chat_continuation))
except urllib.error.HTTPError as err: except HTTPError as err:
retry.error = err retry.error = err
continue continue
return False, None, None, None return False, None, None, None

View File

@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeMusicSearchURLIE, YoutubeMusicSearchURLIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeStoriesIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeYtBeIE, YoutubeYtBeIE,
@ -102,6 +101,7 @@ from .americastestkitchen import (
AmericasTestKitchenIE, AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE, AmericasTestKitchenSeasonIE,
) )
from .anchorfm import AnchorFMEpisodeIE
from .angel import AngelIE from .angel import AngelIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .aol import AolIE from .aol import AolIE
@ -203,13 +203,18 @@ from .bfmtv import (
BFMTVLiveIE, BFMTVLiveIE,
BFMTVArticleIE, BFMTVArticleIE,
) )
from .bibeltv import BibelTVIE from .bibeltv import (
BibelTVLiveIE,
BibelTVSeriesIE,
BibelTVVideoIE,
)
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bigo import BigoIE from .bigo import BigoIE
from .bild import BildIE from .bild import BildIE
from .bilibili import ( from .bilibili import (
BiliBiliIE, BiliBiliIE,
BiliBiliBangumiIE, BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE, BiliBiliBangumiMediaIE,
BiliBiliSearchIE, BiliBiliSearchIE,
BilibiliCategoryIE, BilibiliCategoryIE,
@ -238,19 +243,28 @@ from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blerp import BlerpIE
from .blogger import BloggerIE from .blogger import BloggerIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE
from .box import BoxIE from .box import BoxIE
from .booyah import BooyahClipsIE from .boxcast import BoxCastVideoIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import ( from .br import (
BRIE, BRIE,
BRMediathekIE, BRMediathekIE,
) )
from .bravotv import BravoTVIE from .bravotv import BravoTVIE
from .brainpop import (
BrainPOPIE,
BrainPOPJrIE,
BrainPOPELLIE,
BrainPOPEspIE,
BrainPOPFrIE,
BrainPOPIlIE,
)
from .breakcom import BreakIE from .breakcom import BreakIE
from .breitbart import BreitBartIE from .breitbart import BreitBartIE
from .brightcove import ( from .brightcove import (
@ -270,6 +284,10 @@ from .camdemy import (
CamdemyIE, CamdemyIE,
CamdemyFolderIE CamdemyFolderIE
) )
from .camfm import (
CamFMEpisodeIE,
CamFMShowIE
)
from .cammodels import CamModelsIE from .cammodels import CamModelsIE
from .camsoda import CamsodaIE from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE from .camtasia import CamtasiaEmbedIE
@ -277,12 +295,6 @@ from .camwithher import CamWithHerIE
from .canalalpha import CanalAlphaIE from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canvas import (
CanvasIE,
CanvasEenIE,
VrtNUIE,
DagelijkseKostIE,
)
from .carambatv import ( from .carambatv import (
CarambaTVIE, CarambaTVIE,
CarambaTVPageIE, CarambaTVPageIE,
@ -295,15 +307,18 @@ from .cbc import (
CBCGemPlaylistIE, CBCGemPlaylistIE,
CBCGemLiveIE, CBCGemLiveIE,
) )
from .cbs import CBSIE from .cbs import (
from .cbslocal import ( CBSIE,
CBSLocalIE, ParamountPressExpressIE,
CBSLocalArticleIE,
) )
from .cbsinteractive import CBSInteractiveIE from .cbsinteractive import CBSInteractiveIE
from .cbsnews import ( from .cbsnews import (
CBSNewsEmbedIE, CBSNewsEmbedIE,
CBSNewsIE, CBSNewsIE,
CBSLocalIE,
CBSLocalArticleIE,
CBSLocalLiveIE,
CBSNewsLiveIE,
CBSNewsLiveVideoIE, CBSNewsLiveVideoIE,
) )
from .cbssports import ( from .cbssports import (
@ -342,6 +357,7 @@ from .ciscolive import (
) )
from .ciscowebex import CiscoWebexIE from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE from .cjsw import CJSWIE
from .clipchamp import ClipchampIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clippit import ClippitIE from .clippit import ClippitIE
from .cliprs import ClipRsIE from .cliprs import ClipRsIE
@ -389,9 +405,12 @@ from .crowdbunker import (
CrowdBunkerIE, CrowdBunkerIE,
CrowdBunkerChannelIE, CrowdBunkerChannelIE,
) )
from .crtvg import CrtvgIE
from .crunchyroll import ( from .crunchyroll import (
CrunchyrollBetaIE, CrunchyrollBetaIE,
CrunchyrollBetaShowIE, CrunchyrollBetaShowIE,
CrunchyrollMusicIE,
CrunchyrollArtistIE,
) )
from .cspan import CSpanIE, CSpanCongressIE from .cspan import CSpanIE, CSpanCongressIE
from .ctsnews import CtsNewsIE from .ctsnews import CtsNewsIE
@ -408,6 +427,10 @@ from .cybrary import (
CybraryIE, CybraryIE,
CybraryCourseIE CybraryCourseIE
) )
from .dacast import (
DacastVODIE,
DacastPlaylistIE,
)
from .daftsex import DaftsexIE from .daftsex import DaftsexIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
from .dailymotion import ( from .dailymotion import (
@ -438,6 +461,10 @@ from .deezer import (
) )
from .democracynow import DemocracynowIE from .democracynow import DemocracynowIE
from .detik import DetikEmbedIE from .detik import DetikEmbedIE
from .dlf import (
DLFIE,
DLFCorpusIE,
)
from .dfb import DFBIE from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .digg import DiggIE from .digg import DiggIE
@ -470,6 +497,7 @@ from .dplay import (
DiscoveryPlusItalyIE, DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE, DiscoveryPlusItalyShowIE,
DiscoveryPlusIndiaShowIE, DiscoveryPlusIndiaShowIE,
GlobalCyclingNetworkPlusIE,
) )
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE from .drbonanza import DRBonanzaIE
@ -493,6 +521,7 @@ from .deuxm import (
DeuxMNewsIE DeuxMNewsIE
) )
from .digitalconcerthall import DigitalConcertHallIE from .digitalconcerthall import DigitalConcertHallIE
from .discogs import DiscogsReleasePlaylistIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from .disney import DisneyIE from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
@ -507,6 +536,7 @@ from .dw import (
) )
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .echomsk import EchoMskIE from .echomsk import EchoMskIE
from .egghead import ( from .egghead import (
EggheadCourseIE, EggheadCourseIE,
@ -516,6 +546,7 @@ from .ehow import EHowIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE from .einthusan import EinthusanIE
from .eitb import EitbIE from .eitb import EitbIE
from .elevensports import ElevenSportsIE
from .ellentube import ( from .ellentube import (
EllenTubeIE, EllenTubeIE,
EllenTubeVideoIE, EllenTubeVideoIE,
@ -549,6 +580,7 @@ from .espn import (
ESPNCricInfoIE, ESPNCricInfoIE,
) )
from .esri import EsriVideoIE from .esri import EsriVideoIE
from .ettutv import EttuTvIE
from .europa import EuropaIE, EuroParlWebstreamIE from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE from .eurosport import EurosportIE
@ -635,6 +667,7 @@ from .funimation import (
FunimationShowIE, FunimationShowIE,
) )
from .funk import FunkIE from .funk import FunkIE
from .funker530 import Funker530IE
from .fusion import FusionIE from .fusion import FusionIE
from .fuyintv import FuyinTVIE from .fuyintv import FuyinTVIE
from .gab import ( from .gab import (
@ -670,10 +703,18 @@ from .gfycat import GfycatIE
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
from .giga import GigaIE from .giga import GigaIE
from .glide import GlideIE from .glide import GlideIE
from .globalplayer import (
GlobalPlayerLiveIE,
GlobalPlayerLivePlaylistIE,
GlobalPlayerAudioIE,
GlobalPlayerAudioEpisodeIE,
GlobalPlayerVideoIE
)
from .globo import ( from .globo import (
GloboIE, GloboIE,
GloboArticleIE, GloboArticleIE,
) )
from .gmanetwork import GMANetworkVideoIE
from .go import GoIE from .go import GoIE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .gofile import GofileIE from .gofile import GofileIE
@ -705,13 +746,16 @@ from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
from .hellporno import HellPornoIE from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hgtv import HGTVComShowIE from .hgtv import HGTVComShowIE
from .hketv import HKETVIE from .hketv import HKETVIE
from .hidive import HiDiveIE from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE from .hitrecord import HitRecordIE
from .hollywoodreporter import (
HollywoodReporterIE,
HollywoodReporterPlaylistIE,
)
from .holodex import HolodexIE from .holodex import HolodexIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .hotstar import ( from .hotstar import (
@ -723,6 +767,7 @@ from .hotstar import (
) )
from .howcast import HowcastIE from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE from .howstuffworks import HowStuffWorksIE
from .hrefli import HrefLiRedirectIE
from .hrfensehen import HRFernsehenIE from .hrfensehen import HRFernsehenIE
from .hrti import ( from .hrti import (
HRTiIE, HRTiIE,
@ -745,12 +790,14 @@ from .hungama import (
HungamaAlbumPlaylistIE, HungamaAlbumPlaylistIE,
) )
from .hypem import HypemIE from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE from .hytale import HytaleIE
from .icareus import IcareusIE from .icareus import IcareusIE
from .ichinanalive import ( from .ichinanalive import (
IchinanaLiveIE, IchinanaLiveIE,
IchinanaLiveClipIE, IchinanaLiveClipIE,
) )
from .idolplus import IdolPlusIE
from .ign import ( from .ign import (
IGNIE, IGNIE,
IGNVideoIE, IGNVideoIE,
@ -835,6 +882,7 @@ from .japandiet import (
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .joj import JojIE from .joj import JojIE
from .jstream import JStreamIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .kakao import KakaoIE from .kakao import KakaoIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
@ -844,7 +892,6 @@ from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .ketnet import KetnetIE
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -857,6 +904,7 @@ from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE from .kompas import KompasVideoIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE from .koo import KooIE
@ -908,6 +956,10 @@ from .leeco import (
LePlaylistIE, LePlaylistIE,
LetvCloudIE, LetvCloudIE,
) )
from .lefigaro import (
LeFigaroVideoEmbedIE,
LeFigaroVideoSectionIE,
)
from .lego import LEGOIE from .lego import LEGOIE
from .lemonde import LemondeIE from .lemonde import LemondeIE
from .lenta import LentaIE from .lenta import LentaIE
@ -926,10 +978,6 @@ from .limelight import (
LimelightChannelIE, LimelightChannelIE,
LimelightChannelListIE, LimelightChannelListIE,
) )
from .line import (
LineLiveIE,
LineLiveChannelIE,
)
from .linkedin import ( from .linkedin import (
LinkedInIE, LinkedInIE,
LinkedInLearningIE, LinkedInLearningIE,
@ -956,11 +1004,15 @@ from .lrt import (
LRTVODIE, LRTVODIE,
LRTStreamIE LRTStreamIE
) )
from .lumni import (
LumniIE
)
from .lynda import ( from .lynda import (
LyndaIE, LyndaIE,
LyndaCourseIE LyndaCourseIE
) )
from .m6 import M6IE from .m6 import M6IE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE from .magentamusik360 import MagentaMusik360IE
from .mailru import ( from .mailru import (
MailRuIE, MailRuIE,
@ -1069,7 +1121,8 @@ from .mojvideo import MojvideoIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import ( from .motherless import (
MotherlessIE, MotherlessIE,
MotherlessGroupIE MotherlessGroupIE,
MotherlessGalleryIE,
) )
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
@ -1089,6 +1142,7 @@ from .mtv import (
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .murrtube import MurrtubeIE, MurrtubeUserIE from .murrtube import MurrtubeIE, MurrtubeUserIE
from .museai import MuseAIIE
from .musescore import MuseScoreIE from .musescore import MuseScoreIE
from .musicdex import ( from .musicdex import (
MusicdexSongIE, MusicdexSongIE,
@ -1110,6 +1164,7 @@ from .myvi import (
) )
from .myvideoge import MyVideoGeIE from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .mzaalo import MzaaloIE
from .n1 import ( from .n1 import (
N1InfoAssetIE, N1InfoAssetIE,
N1InfoIIE, N1InfoIIE,
@ -1158,6 +1213,7 @@ from .nebula import (
NebulaSubscriptionsIE, NebulaSubscriptionsIE,
NebulaChannelIE, NebulaChannelIE,
) )
from .nekohacker import NekoHackerIE
from .nerdcubed import NerdCubedFeedIE from .nerdcubed import NerdCubedFeedIE
from .netzkino import NetzkinoIE from .netzkino import NetzkinoIE
from .neteasemusic import ( from .neteasemusic import (
@ -1206,6 +1262,9 @@ from .nhk import (
NhkForSchoolBangumiIE, NhkForSchoolBangumiIE,
NhkForSchoolSubjectIE, NhkForSchoolSubjectIE,
NhkForSchoolProgramListIE, NhkForSchoolProgramListIE,
NhkRadioNewsPageIE,
NhkRadiruIE,
NhkRadiruLiveIE,
) )
from .nhl import NHLIE from .nhl import NHLIE
from .nick import ( from .nick import (
@ -1225,6 +1284,7 @@ from .niconico import (
NicovideoSearchIE, NicovideoSearchIE,
NicovideoSearchURLIE, NicovideoSearchURLIE,
NicovideoTagURLIE, NicovideoTagURLIE,
NiconicoLiveIE,
) )
from .ninecninemedia import ( from .ninecninemedia import (
NineCNineMediaIE, NineCNineMediaIE,
@ -1282,6 +1342,7 @@ from .nrl import NRLTVIE
from .ntvcojp import NTVCoJpCUIE from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE from .ntvde import NTVDeIE
from .ntvru import NTVRuIE from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE
from .nytimes import ( from .nytimes import (
NYTimesIE, NYTimesIE,
NYTimesArticleIE, NYTimesArticleIE,
@ -1292,6 +1353,7 @@ from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE from .nzz import NZZIE
from .odatv import OdaTVIE from .odatv import OdaTVIE
from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .oftv import ( from .oftv import (
OfTVIE, OfTVIE,
@ -1332,6 +1394,7 @@ from .orf import (
ORFIPTVIE, ORFIPTVIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE
from .owncloud import OwnCloudIE
from .packtpub import ( from .packtpub import (
PacktPubIE, PacktPubIE,
PacktPubCourseIE, PacktPubCourseIE,
@ -1357,7 +1420,7 @@ from .patreon import (
PatreonIE, PatreonIE,
PatreonCampaignIE PatreonCampaignIE
) )
from .pbs import PBSIE from .pbs import PBSIE, PBSKidsIE
from .pearvideo import PearVideoIE from .pearvideo import PearVideoIE
from .peekvids import PeekVidsIE, PlayVidsIE from .peekvids import PeekVidsIE, PlayVidsIE
from .peertube import ( from .peertube import (
@ -1375,6 +1438,7 @@ from .periscope import (
PeriscopeIE, PeriscopeIE,
PeriscopeUserIE, PeriscopeUserIE,
) )
from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
@ -1432,7 +1496,6 @@ from .polskieradio import (
PolskieRadioPlayerIE, PolskieRadioPlayerIE,
PolskieRadioPodcastIE, PolskieRadioPodcastIE,
PolskieRadioPodcastListIE, PolskieRadioPodcastListIE,
PolskieRadioRadioKierowcowIE,
) )
from .popcorntimes import PopcorntimesIE from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE from .popcorntv import PopcornTVIE
@ -1455,6 +1518,7 @@ from .puhutv import (
PuhuTVIE, PuhuTVIE,
PuhuTVSerieIE, PuhuTVSerieIE,
) )
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
from .prankcast import PrankCastIE from .prankcast import PrankCastIE
from .premiershiprugby import PremiershipRugbyIE from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE from .presstv import PressTVIE
@ -1469,6 +1533,7 @@ from .prx import (
) )
from .puls4 import Puls4IE from .puls4 import Puls4IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE from .qingting import QingTingIE
from .qqmusic import ( from .qqmusic import (
QQMusicIE, QQMusicIE,
@ -1501,6 +1566,8 @@ from .radlive import (
RadLiveSeasonIE, RadLiveSeasonIE,
) )
from .rai import ( from .rai import (
RaiIE,
RaiCulturaIE,
RaiPlayIE, RaiPlayIE,
RaiPlayLiveIE, RaiPlayLiveIE,
RaiPlayPlaylistIE, RaiPlayPlaylistIE,
@ -1509,13 +1576,16 @@ from .rai import (
RaiPlaySoundPlaylistIE, RaiPlaySoundPlaylistIE,
RaiNewsIE, RaiNewsIE,
RaiSudtirolIE, RaiSudtirolIE,
RaiIE,
) )
from .raywenderlich import ( from .raywenderlich import (
RayWenderlichIE, RayWenderlichIE,
RayWenderlichCourseIE, RayWenderlichCourseIE,
) )
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .rbgtum import (
RbgTumIE,
RbgTumCourseIE,
)
from .rcs import ( from .rcs import (
RCSIE, RCSIE,
RCSEmbedsIE, RCSEmbedsIE,
@ -1527,6 +1597,7 @@ from .rcti import (
RCTIPlusTVIE, RCTIPlusTVIE,
) )
from .rds import RDSIE from .rds import RDSIE
from .recurbate import RecurbateIE
from .redbee import ParliamentLiveUKIE, RTBFIE from .redbee import ParliamentLiveUKIE, RTBFIE
from .redbulltv import ( from .redbulltv import (
RedBullTVIE, RedBullTVIE,
@ -1549,6 +1620,7 @@ from .rentv import (
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rice import RICEIE from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE from .rockstargames import RockstarGamesIE
@ -1563,6 +1635,7 @@ from .rottentomatoes import RottenTomatoesIE
from .rozhlas import ( from .rozhlas import (
RozhlasIE, RozhlasIE,
RozhlasVltavaIE, RozhlasVltavaIE,
MujRozhlasIE,
) )
from .rte import RteIE, RteRadioIE from .rte import RteIE, RteRadioIE
from .rtlnl import ( from .rtlnl import (
@ -1586,6 +1659,11 @@ from .rtnews import (
from .rtp import RTPIE from .rtp import RTPIE
from .rtrfm import RTRFMIE from .rtrfm import RTRFMIE
from .rts import RTSIE from .rts import RTSIE
from .rtvcplay import (
RTVCPlayIE,
RTVCPlayEmbedIE,
RTVCKalturaIE,
)
from .rtve import ( from .rtve import (
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
@ -1631,6 +1709,7 @@ from .ruv import (
RuvIE, RuvIE,
RuvSpilaIE RuvSpilaIE
) )
from .s4c import S4CIE
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE, SafariApiIE,
@ -1655,6 +1734,7 @@ from .scte import (
) )
from .scrolller import ScrolllerIE from .scrolller import ScrolllerIE
from .seeker import SeekerIE from .seeker import SeekerIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE from .senategov import SenateISVPIE, SenateGovIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servus import ServusIE from .servus import ServusIE
@ -1752,6 +1832,7 @@ from .spike import (
BellatorIE, BellatorIE,
ParamountNetworkIE, ParamountNetworkIE,
) )
from .stageplus import StagePlusVODConcertIE
from .startrek import StarTrekIE from .startrek import StarTrekIE
from .stitcher import ( from .stitcher import (
StitcherIE, StitcherIE,
@ -1777,6 +1858,10 @@ from .srgssr import (
SRGSSRPlayIE, SRGSSRPlayIE,
) )
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
)
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE from .startv import StarTVIE
from .steam import ( from .steam import (
@ -1789,7 +1874,6 @@ from .storyfire import (
StoryFireSeriesIE, StoryFireSeriesIE,
) )
from .streamable import StreamableIE from .streamable import StreamableIE
from .streamanity import StreamanityIE
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streamff import StreamFFIE from .streamff import StreamFFIE
@ -1827,7 +1911,10 @@ from .teachertube import (
TeacherTubeUserIE, TeacherTubeUserIE,
) )
from .teachingchannel import TeachingChannelIE from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import (
TeamcocoIE,
ConanClassicIE,
)
from .teamtreehouse import TeamTreeHouseIE from .teamtreehouse import TeamTreeHouseIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import ( from .ted import (
@ -1839,6 +1926,7 @@ from .ted import (
from .tele5 import Tele5IE from .tele5 import Tele5IE
from .tele13 import Tele13IE from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecaribe import TelecaribePlayIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE from .telegram import TelegramEmbedIE
@ -1853,7 +1941,7 @@ from .telequebec import (
) )
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE
from .tempo import TempoIE from .tempo import TempoIE, IVXPlayerIE
from .tencent import ( from .tencent import (
IflixEpisodeIE, IflixEpisodeIE,
IflixSeriesIE, IflixSeriesIE,
@ -1930,6 +2018,7 @@ from .traileraddict import TrailerAddictIE
from .triller import ( from .triller import (
TrillerIE, TrillerIE,
TrillerUserIE, TrillerUserIE,
TrillerShortIE,
) )
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trovo import ( from .trovo import (
@ -1951,10 +2040,9 @@ from .tubitv import (
) )
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import ( from .tunein import (
TuneInClipIE,
TuneInStationIE, TuneInStationIE,
TuneInProgramIE, TuneInPodcastIE,
TuneInTopicIE, TuneInPodcastEpisodeIE,
TuneInShortenerIE, TuneInShortenerIE,
) )
from .tunepk import TunePkIE from .tunepk import TunePkIE
@ -2022,7 +2110,6 @@ from .tvp import (
) )
from .tvplay import ( from .tvplay import (
TVPlayIE, TVPlayIE,
ViafreeIE,
TVPlayHomeIE, TVPlayHomeIE,
) )
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
@ -2181,12 +2268,16 @@ from .viu import (
ViuIE, ViuIE,
ViuPlaylistIE, ViuPlaylistIE,
ViuOTTIE, ViuOTTIE,
ViuOTTIndonesiaIE,
) )
from .vk import ( from .vk import (
VKIE, VKIE,
VKUserVideosIE, VKUserVideosIE,
VKWallPostIE, VKWallPostIE,
VKPlayIE,
VKPlayLiveIE,
) )
from .vocaroo import VocarooIE
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE from .vodplatform import VODPlatformIE
@ -2204,7 +2295,12 @@ from .voxmedia import (
VoxMediaVolumeIE, VoxMediaVolumeIE,
VoxMediaIE, VoxMediaIE,
) )
from .vrt import VRTIE from .vrt import (
VRTIE,
VrtNUIE,
KetnetIE,
DagelijkseKostIE,
)
from .vrak import VrakIE from .vrak import VrakIE
from .vrv import ( from .vrv import (
VRVIE, VRVIE,
@ -2255,8 +2351,20 @@ from .weibo import (
WeiboMobileIE WeiboMobileIE
) )
from .weiqitv import WeiqiTVIE from .weiqitv import WeiqiTVIE
from .weverse import (
WeverseIE,
WeverseMediaIE,
WeverseMomentIE,
WeverseLiveTabIE,
WeverseMediaTabIE,
WeverseLiveIE,
)
from .wevidi import WeVidiIE
from .weyyak import WeyyakIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE from .wikimedia import WikimediaIE
from .willow import WillowIE from .willow import WillowIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE from .wimtv import WimTVIE
from .whowatch import WhoWatchIE from .whowatch import WhoWatchIE
from .wistia import ( from .wistia import (
@ -2282,6 +2390,12 @@ from .wsj import (
WSJArticleIE, WSJArticleIE,
) )
from .wwe import WWEIE from .wwe import WWEIE
from .wykop import (
WykopDigIE,
WykopDigCommentIE,
WykopPostIE,
WykopPostCommentIE,
)
from .xanimu import XanimuIE from .xanimu import XanimuIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
@ -2301,13 +2415,14 @@ from .xnxx import XNXXIE
from .xstream import XstreamIE from .xstream import XstreamIE
from .xtube import XTubeUserIE, XTubeIE from .xtube import XTubeUserIE, XTubeIE
from .xuite import XuiteIE from .xuite import XuiteIE
from .xvideos import XVideosIE from .xvideos import (
XVideosIE,
XVideosQuickiesIE
)
from .xxxymovies import XXXYMoviesIE from .xxxymovies import XXXYMoviesIE
from .yahoo import ( from .yahoo import (
YahooIE, YahooIE,
YahooSearchIE, YahooSearchIE,
YahooGyaOPlayerIE,
YahooGyaOIE,
YahooJapanNewsIE, YahooJapanNewsIE,
) )
from .yandexdisk import YandexDiskIE from .yandexdisk import YandexDiskIE
@ -2325,6 +2440,10 @@ from .yandexvideo import (
ZenYandexChannelIE, ZenYandexChannelIE,
) )
from .yapfiles import YapFilesIE from .yapfiles import YapFilesIE
from .yappy import (
YappyIE,
YappyProfileIE,
)
from .yesjapan import YesJapanIE from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE from .yle_areena import YleAreenaIE
@ -2342,6 +2461,10 @@ from .younow import (
from .youporn import YouPornIE from .youporn import YouPornIE
from .yourporn import YourPornIE from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .zaiko import (
ZaikoIE,
ZaikoETicketIE,
)
from .zapiks import ZapiksIE from .zapiks import ZapiksIE
from .zattoo import ( from .zattoo import (
BBVTVIE, BBVTVIE,
@ -2399,6 +2522,7 @@ from .zingmp3 import (
ZingMp3WeekChartIE, ZingMp3WeekChartIE,
ZingMp3ChartMusicVideoIE, ZingMp3ChartMusicVideoIE,
ZingMp3UserIE, ZingMp3UserIE,
ZingMp3HubIE,
) )
from .zoom import ZoomIE from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE

View File

@ -12,6 +12,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
'uploader': 'Behind the News', 'uploader': 'Behind the News',
'uploader_id': 'behindthenews', 'uploader_id': 'behindthenews',
} }
}, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': {
'id': '102520540',
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
video = True video = True
if mobj is None: if mobj is None:
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage) mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
if mobj is None: if mobj is None:
mobj = re.search( mobj = re.search(
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);', r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
urls_info = self._parse_json( urls_info = self._parse_json(
mobj.group('json_data'), video_id, transform_source=js_to_json) mobj.group('json_data'), video_id, transform_source=js_to_json)
youtube = mobj.group('type') == 'YouTube' youtube = mobj.group('type') == 'YouTube'
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4' video = mobj.group('type') == 'Video' or traverse_obj(
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
if not isinstance(urls_info, list): if not isinstance(urls_info, list):
urls_info = [urls_info] urls_info = [urls_info]

View File

@ -22,80 +22,23 @@ from ..utils import (
int_or_none, int_or_none,
intlist_to_bytes, intlist_to_bytes,
OnDemandPagedList, OnDemandPagedList,
request_to_url,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
) )
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
def add_opener(ydl, handler): """Add a handler for opening URLs, like _download_webpage"""
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
assert isinstance(ydl._opener, urllib.request.OpenerDirector) rh = ydl._request_director.handlers['Urllib']
ydl._opener.add_handler(handler) if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
def remove_opener(ydl, handler): assert isinstance(opener, urllib.request.OpenerDirector)
''' opener.add_handler(handler)
Remove handler(s) for opening URLs rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
@param handler Either handler object itself or handler type.
Specifying handler type will remove all handler which isinstance returns True.
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
find_cp = lambda x: x is handler
removed = []
for meth in dir(handler):
if meth in ["redirect_request", "do_open", "proxy_open"]:
# oops, coincidental match
continue
i = meth.find("_")
protocol = meth[:i]
condition = meth[i + 1:]
if condition.startswith("error"):
j = condition.find("_") + i + 1
kind = meth[j + 1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = opener.handle_error.get(protocol, {})
opener.handle_error[protocol] = lookup
elif condition == "open":
kind = protocol
lookup = opener.handle_open
elif condition == "response":
kind = protocol
lookup = opener.process_response
elif condition == "request":
kind = protocol
lookup = opener.process_request
else:
continue
handlers = lookup.setdefault(kind, [])
if handlers:
handlers[:] = [x for x in handlers if not find_cp(x)]
removed.append(x for x in handlers if find_cp(x))
if removed:
for x in opener.handlers:
if find_cp(x):
x.add_parent(None)
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
class AbemaLicenseHandler(urllib.request.BaseHandler): class AbemaLicenseHandler(urllib.request.BaseHandler):
@ -137,11 +80,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url): def abematv_license_open(self, url):
url = request_to_url(url) url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket) response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={ return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data), 'Content-Length': str(len(response_data)),
}, url=url, code=200) }, url=url, code=200)
@ -213,10 +156,7 @@ class AbemaTVBaseIE(InfoExtractor):
}) })
AbemaTVBaseIE._USERTOKEN = user_data['token'] AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler)
add_opener(self._downloader, AbemaLicenseHandler(self)) add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True): def _get_media_token(self, invalidate=False, to_show=True):
@ -436,6 +376,16 @@ class AbemaTVIE(AbemaTVBaseIE):
if 3 not in ondemand_types: if 3 not in ondemand_types:
# cannot acquire decryption key for these streams # cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream') self.report_warning('This is a premium-only stream')
info.update(traverse_obj(api_response, {
'series': ('series', 'title'),
'season': ('season', 'title'),
'season_number': ('season', 'sequence'),
'episode_number': ('episode', 'number'),
}))
if not title:
title = traverse_obj(api_response, ('episode', 'title'))
if not description:
description = traverse_obj(api_response, ('episode', 'content'))
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8' m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
elif video_type == 'slots': elif video_type == 'slots':

View File

@ -40,28 +40,33 @@ class ACastBaseIE(InfoExtractor):
class ACastIE(ACastBaseIE): class ACastIE(ACastBaseIE):
IE_NAME = 'acast' IE_NAME = 'acast'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x:
https?:// https?://
(?: (?:
(?:(?:embed|www)\.)?acast\.com/| (?:(?:embed|www)\.)?acast\.com/|
play\.acast\.com/s/ play\.acast\.com/s/
) )
(?P<channel>[^/]+)/(?P<id>[^/#?]+) (?P<channel>[^/]+)/(?P<id>[^/#?"]+)
''' )'''
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{ _TESTS = [{
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
'info_dict': { 'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3', 'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna', 'title': '2. Raggarmordet - Röster ur det förflutna',
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67', 'description': 'md5:013959207e05011ad14a222cf22278cc',
'timestamp': 1477346700, 'timestamp': 1477346700,
'upload_date': '20161024', 'upload_date': '20161024',
'duration': 2766, 'duration': 2766,
'creator': 'Anton Berg & Martin Johnson', 'creator': 'Third Ear Studio',
'series': 'Spår', 'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna', 'episode': '2. Raggarmordet - Röster ur det förflutna',
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
'episode_number': 2,
'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4,
'season': 'Season 4',
} }
}, { }, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
'info_dict': {
'id': '646c68fb21fbf20011e9c651',
'ext': 'mp3',
'creator': 'The Australian National University',
'display_id': 'can-labor-be-a-long-form-government',
'duration': 2618,
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
'title': 'Can Labor be a long-form government?',
'episode': 'Can Labor be a long-form government?',
'upload_date': '20230523',
'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups() channel, display_id = self._match_valid_url(url).groups()

View File

@ -6,10 +6,8 @@ import random
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import ( from ..compat import compat_b64decode
compat_HTTPError, from ..networking.exceptions import HTTPError
compat_b64decode,
)
from ..utils import ( from ..utils import (
ass_subtitles_timecode, ass_subtitles_timecode,
bytes_to_intlist, bytes_to_intlist,
@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self._HEADERS = {'authorization': 'Bearer ' + access_token} self._HEADERS = {'authorization': 'Bearer ' + access_token}
except ExtractorError as e: except ExtractorError as e:
message = None message = None
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
resp = self._parse_json( resp = self._parse_json(
e.cause.read().decode(), None, fatal=False) or {} e.cause.response.read().decode(), None, fatal=False) or {}
message = resp.get('message') or resp.get('code') message = resp.get('message') or resp.get('code')
self.report_warning(message or self._LOGIN_ERR_MESSAGE) self.report_warning(message or self._LOGIN_ERR_MESSAGE)
@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}) })
break break
except ExtractorError as e: except ExtractorError as e:
if not isinstance(e.cause, compat_HTTPError): if not isinstance(e.cause, HTTPError):
raise e raise e
if e.cause.code == 401: if e.cause.status == 401:
# This usually goes away with a different random pkcs1pad, so retry # This usually goes away with a different random pkcs1pad, so retry
continue continue
error = self._parse_json(e.cause.read(), video_id) error = self._parse_json(e.cause.response.read(), video_id)
message = error.get('message') message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)

View File

@ -2,11 +2,11 @@ import getpass
import json import json
import re import re
import time import time
import urllib.error
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
form_page, urlh = form_page_res form_page, urlh = form_page_res
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
if not re.match(r'https?://', post_url): if not re.match(r'https?://', post_url):
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) post_url = compat_urlparse.urljoin(urlh.url, post_url)
form_data = self._hidden_inputs(form_page) form_data = self._hidden_inputs(form_page)
form_data.update(data) form_data.update(data)
return self._download_webpage_handle( return self._download_webpage_handle(
@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
elif 'automatically signed in with' in provider_redirect_page: elif 'automatically signed in with' in provider_redirect_page:
# Seems like comcast is rolling up new way of automatically signing customers # Seems like comcast is rolling up new way of automatically signing customers
oauth_redirect_url = self._html_search_regex( oauth_redirect_url = self._html_search_regex(
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page, r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
'oauth redirect (signed)') 'oauth redirect (signed)')
# Just need to process the request. No useful data comes back # Just need to process the request. No useful data comes back
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login') self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
@ -1573,7 +1573,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}), headers={ }), headers={
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded'
}) })
elif mso_id == 'Spectrum': elif mso_id in ('Spectrum', 'Charter_Direct'):
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
# as a one-off implementation. # as a one-off implementation.
provider_redirect_page, urlh = provider_redirect_page_res provider_redirect_page, urlh = provider_redirect_page_res
@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 1 hidden_data['history'] = 1
provider_login_page_res = self._download_webpage_handle( provider_login_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending first bookend', urlh.url, video_id, 'Sending first bookend',
query=hidden_data) query=hidden_data)
provider_association_redirect, urlh = post_form( provider_association_redirect, urlh = post_form(
@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}) })
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl()) provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle( last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 3 hidden_data['history'] = 3
mvpd_confirm_page_res = self._download_webpage_handle( mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending final bookend', urlh.url, video_id, 'Sending final bookend',
query=hidden_data) query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login') post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 1 hidden_data['history_val'] = 1
provider_login_redirect_page_res = self._download_webpage_handle( provider_login_redirect_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending First Bookend', urlh.url, video_id, 'Sending First Bookend',
query=hidden_data) query=hidden_data)
provider_login_redirect_page, urlh = provider_login_redirect_page_res provider_login_redirect_page, urlh = provider_login_redirect_page_res
@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}) })
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl()) provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle( last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 3 hidden_data['history_val'] = 3
mvpd_confirm_page_res = self._download_webpage_handle( mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending Final Bookend', urlh.url, video_id, 'Sending Final Bookend',
query=hidden_data) query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login') post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
# based redirect that should be followed. # based redirect that should be followed.
provider_redirect_page, urlh = provider_redirect_page_res provider_redirect_page, urlh = provider_redirect_page_res
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_redirect_page, url=urlh.geturl()) provider_redirect_page, url=urlh.url)
if provider_refresh_redirect_url: if provider_refresh_redirect_url:
provider_redirect_page_res = self._download_webpage_handle( provider_redirect_page_res = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
'requestor_id': requestor_id, 'requestor_id': requestor_id,
}), headers=mvpd_headers) }), headers=mvpd_headers)
except ExtractorError as e: except ExtractorError as e:
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise_mvpd_required() raise_mvpd_required()
raise raise
if '<pendingLogout' in session: if '<pendingLogout' in session:

View File

@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
continue continue
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
if ext == 'm3u8': if ext == 'm3u8':
info['formats'].extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
info['formats'].extend(fmts)
self._merge_subtitles(subs, target=info['subtitles'])
elif ext == 'f4m': elif ext == 'f4m':
continue continue
# info['formats'].extend(self._extract_f4m_formats( # info['formats'].extend(self._extract_f4m_formats(

View File

@ -3,6 +3,8 @@ from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
remove_start,
traverse_obj,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
) )
@ -72,7 +74,14 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
requestor_id, brand = self._DOMAIN_MAP[domain] requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json( result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0] filter_value, query={'filter[%s]' % filter_key: filter_value})
result = traverse_obj(
result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value),
get_all=False)
if not result:
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
video_id=remove_start(filter_value, '/'))
title = result['title'] title = result['title']
video_id = result['id'] video_id = result['id']
media_url = result['publicUrl'] media_url = result['publicUrl']
@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.', 'skip': 'Geo-restricted - This content is not available in your location.'
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': { 'info_dict': {
@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.',
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True 'only_matching': True
@ -303,6 +313,7 @@ class HistoryTopicIE(AENetworksBaseIE):
class HistoryPlayerIE(AENetworksBaseIE): class HistoryPlayerIE(AENetworksBaseIE):
IE_NAME = 'history:player' IE_NAME = 'history:player'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups() domain, video_id = self._match_valid_url(url).groups()

View File

@ -1,5 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from .vimeo import VimeoIE from .vimeo import VimeoIE
from ..utils import ExtractorError, traverse_obj, url_or_none
class AeonCoIE(InfoExtractor): class AeonCoIE(InfoExtractor):
@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
} }
}, { }, {
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e', 'md5': '03582d795382e49f2fd0b427b55de409',
'info_dict': { 'info_dict': {
'id': '728595228', 'id': '759576926',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Wrought', 'title': 'Wrought',
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
'uploader': 'Biofilm Productions', 'uploader': 'Aeon Video',
'uploader_id': 'user140352216', 'uploader_id': 'aeonvideo',
'uploader_url': 'https://vimeo.com/user140352216', 'uploader_url': 'https://vimeo.com/aeonvideo',
'duration': 1344 'duration': 1344
} }
}, {
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
'info_dict': {
'id': 'emyi4z-O0ls',
'ext': 'mp4',
'title': 'How to outsmart the Prisoners Dilemma - Lucas Husted',
'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
'uploader': 'TED-Ed',
'uploader_id': '@TEDEd',
'uploader_url': 'https://www.youtube.com/@TEDEd',
'duration': 344,
'upload_date': '20200827',
'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
'playable_in_embed': True,
'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
'categories': ['Education'],
'like_count': int,
'channel': 'TED-Ed',
'chapters': 'count:7',
'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
'tags': 'count:26',
'availability': 'public',
'channel_follower_count': int,
'view_count': int,
'age_limit': 0,
'live_status': 'not_live',
'comment_count': int,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id') embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co') lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
return self.url_result(vimeo_url, VimeoIE) if not embed_url:
raise ExtractorError('No embed URL found in webpage')
if 'player.vimeo.com' in embed_url:
embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
return self.url_result(embed_url)

View File

@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
}, },
}], }],
'skip': 'Video is gone', 'skip': 'Video is gone',
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '20160502_c4c62b9d_174361386_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '20160502_39e739bb_174361386_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, { }, {
# non standard key # non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# PARTIAL_ADULT # adult content
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439', 'url': 'https://vod.afreecatv.com/player/97267690',
'info_dict': { 'info_dict': {
'id': '20180327_27901457_202289533_1', 'id': '20180327_27901457_202289533_1',
'ext': 'mp4', 'ext': 'mp4',
@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['adult content'], 'skip': 'The VOD does not exist',
}, { }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', 'url': 'https://vod.afreecatv.com/player/96753363',
'only_matching': True, 'info_dict': {
}, { 'id': '20230108_9FF5BEE1_244432674_1',
'url': 'http://vod.afreecatv.com/player/15055030', 'ext': 'mp4',
'only_matching': True, 'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
'upload_date': '20230108',
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}] }]
@staticmethod @staticmethod
@ -223,26 +179,21 @@ class AfreecaTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'alert\(["\']This video has been deleted', webpage):
raise ExtractorError(
'Video %s has been deleted' % video_id, expected=True)
station_id = self._search_regex(
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
bbs_id = self._search_regex(
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
partial_view = False partial_view = False
adult_view = False adult_view = False
for _ in range(2): for _ in range(2):
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view',
video_id, headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
if traverse_obj(data, ('code', {int})) == -6221:
raise ExtractorError('The VOD does not exist', expected=True)
query = { query = {
'nTitleNo': video_id, 'nTitleNo': video_id,
'nStationNo': station_id, 'nStationNo': data['station_no'],
'nBbsNo': bbs_id, 'nBbsNo': data['bbs_no'],
} }
if partial_view: if partial_view:
query['partialView'] = 'SKIP_ADULT' query['partialView'] = 'SKIP_ADULT'

View File

@ -191,7 +191,7 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!,
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:season' IE_NAME = 'amazonminitv:season'
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix' IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
_TESTS = [{ _TESTS = [{
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0', 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
'playlist_mincount': 6, 'playlist_mincount': 6,
@ -250,6 +250,7 @@ query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonI
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:series' IE_NAME = 'amazonminitv:series'
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
_TESTS = [{ _TESTS = [{
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
'playlist_mincount': 3, 'playlist_mincount': 3,

View File

@ -11,7 +11,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -72,6 +72,12 @@ class AmericasTestKitchenIE(InfoExtractor):
}, { }, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington', 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -100,7 +106,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor): class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
_TESTS = [{ _TESTS = [{
# ATK Season # ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1', 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -117,29 +123,73 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'title': 'Season 12', 'title': 'Season 12',
}, },
'playlist_count': 13, 'playlist_count': 13,
}, {
# America's Test Kitchen Series
'url': 'https://www.americastestkitchen.com/',
'info_dict': {
'id': 'americastestkitchen',
'title': 'America\'s Test Kitchen',
},
'playlist_count': 558,
}, {
# Cooks Country Series
'url': 'https://www.americastestkitchen.com/cookscountry',
'info_dict': {
'id': 'cookscountry',
'title': 'Cook\'s Country',
},
'playlist_count': 199,
}, {
'url': 'https://www.americastestkitchen.com/cookscountry/',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_path, season_number = self._match_valid_url(url).group('show', 'id') season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2')
season_number = int(season_number) show_path = ('/' + show) if show else ''
show = show or show1
season_number = int_or_none(season_number)
slug = 'cco' if show_path == '/cookscountry' else 'atk' slug, title = {
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
'cookscountry': ('cco', 'Cook\'s Country'),
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
}[show]
season = 'Season %d' % season_number facet_filters = [
'search_document_klass:episode',
'search_show_slug:' + slug,
]
if season_number:
playlist_id = 'season_%d' % season_number
playlist_title = 'Season %d' % season_number
facet_filters.append('search_season_list:' + playlist_title)
else:
playlist_id = show
playlist_title = title
season_search = self._download_json( season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={ playlist_id, headers={
'Origin': 'https://www.americastestkitchen.com', 'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30', 'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={ }, query={
'facetFilters': json.dumps([ 'facetFilters': json.dumps(facet_filters),
'search_season_list:' + season, 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '', 'attributesToHighlight': '',
'hitsPerPage': 1000, 'hitsPerPage': 1000,
}) })
@ -162,4 +212,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
} }
return self.playlist_result( return self.playlist_result(
entries(), 'season_%d' % season_number, season) entries(), playlist_id, playlist_title)

View File

@ -5,6 +5,7 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
strip_jsonp,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _extract_feed_info(self, url): def _extract_feed_info(self, url):
feed = self._download_json( feed = self._download_json(
url, None, 'Downloading Akamai AMP feed', url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed') 'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item') item = feed.get('channel', {}).get('item')
if not item: if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
@ -73,8 +74,10 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else: else:
formats.append({ formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),

View File

@ -0,0 +1,98 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
int_or_none,
str_or_none,
traverse_obj,
unified_timestamp
)
class AnchorFMEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
'info_dict': {
'id': 'e1tpt3d',
'ext': 'mp3',
'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
'duration': 624.718,
'uploader': 'Lovelyti ',
'uploader_id': '991541',
'channel': 'lovelyti',
'modified_date': '20230121',
'modified_timestamp': 1674285178,
'release_date': '20230121',
'release_timestamp': 1674285179,
'episode_id': 'e1tpt3d',
}
}, {
# embed url
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
'info_dict': {
'id': 'e1shjqd',
'ext': 'mp3',
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
'duration': 1042.008,
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'release_date': '20221221',
'release_timestamp': 1671595916,
'modified_date': '20221221',
'modified_timestamp': 1671590834,
'channel': 'apakatatempo',
'uploader': 'Podcast Tempo',
'uploader_id': '2585461',
'season': 'Season 2',
'season_number': 2,
'episode_id': 'e1shjqd',
}
}]
_WEBPAGE_TESTS = [{
'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
'info_dict': {
'id': 'e1shjqd',
'ext': 'mp3',
'release_date': '20221221',
'duration': 1042.008,
'season': 'Season 2',
'modified_timestamp': 1671590834,
'uploader_id': '2585461',
'modified_date': '20221221',
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
'season_number': 2,
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
'release_timestamp': 1671595916,
'episode_id': 'e1shjqd',
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'uploader': 'Podcast Tempo',
'channel': 'apakatatempo',
}
}]
def _real_extract(self, url):
channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
return {
'id': episode_id,
'title': traverse_obj(api_data, ('episode', 'title')),
'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
'ext': 'mp3',
'vcodec': 'none',
'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
'episode_id': episode_id,
'uploader': traverse_obj(api_data, ('creator', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
}

View File

@ -1,8 +1,8 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
HEADRequest,
ExtractorError, ExtractorError,
determine_ext, determine_ext,
scale_thumbnails_to_max_format_width, scale_thumbnails_to_max_format_width,
@ -121,7 +121,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
canonical_url = self._request_webpage( canonical_url = self._request_webpage(
HEADRequest(url), video_id, HEADRequest(url), video_id,
note='Resolve canonical player URL', note='Resolve canonical player URL',
errnote='Could not resolve canonical player URL').geturl() errnote='Could not resolve canonical player URL').url
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url) _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
cid = urllib.parse.parse_qs(query)['cid'][0] cid = urllib.parse.parse_qs(query)['cid'][0]

View File

@ -336,7 +336,7 @@ class AnvatoIE(InfoExtractor):
elif media_format == 'm3u8-variant' or ext == 'm3u8': elif media_format == 'm3u8-variant' or ext == 'm3u8':
# For some videos the initial m3u8 URL returns JSON instead # For some videos the initial m3u8 URL returns JSON instead
manifest_json = self._download_json( manifest_json = self._download_json(
video_url, video_id, note='Downloading manifest JSON', errnote=False) video_url, video_id, note='Downloading manifest JSON', fatal=False)
if manifest_json: if manifest_json:
video_url = manifest_json.get('master_m3u8') video_url = manifest_json.get('master_m3u8')
if not video_url: if not video_url:
@ -392,14 +392,6 @@ class AnvatoIE(InfoExtractor):
url = smuggle_url(url, {'token': anvplayer_data['token']}) url = smuggle_url(url, {'token': anvplayer_data['token']})
yield cls.url_result(url, AnvatoIE, video_id) yield cls.url_result(url, AnvatoIE, video_id)
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(
self._html_search_regex(
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
video_id)
return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass({ self._initialize_geo_bypass({

View File

@ -1,16 +1,16 @@
import json import json
import re import re
import urllib.error
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .naver import NaverBaseIE from .naver import NaverBaseIE
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_HTTPError, compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
ExtractorError, ExtractorError,
HEADRequest,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
dict_get, dict_get,
@ -899,7 +899,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
video_id, note='Fetching archived video file url', expected_status=True) video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e: except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved. # HTTP Error 404 is expected if the video is not saved.
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
self.raise_no_formats( self.raise_no_formats(
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True) 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
else: else:
@ -926,7 +926,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id) info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh: if urlh:
url = compat_urllib_parse_unquote(urlh.geturl()) url = compat_urllib_parse_unquote(urlh.url)
video_file_url_qs = parse_qs(url) video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers # Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
@ -1052,7 +1052,7 @@ class VLiveWebArchiveIE(InfoExtractor):
try: try:
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs) return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
raise ExtractorError('Page was not archived', expected=True) raise ExtractorError('Page was not archived', expected=True)
retry.error = e retry.error = e
continue continue

View File

@ -13,6 +13,7 @@ from ..utils import (
try_get, try_get,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url,
update_url_query, update_url_query,
url_or_none, url_or_none,
xpath_text, xpath_text,
@ -408,6 +409,23 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': '3fd5fead7a370a819341129c8d713136',
'info_dict': {
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12172961',
'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1670710500,
'upload_date': '20221210',
'ext': 'mp4',
'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'series': 'Filme im MDR'
},
}, {
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
'md5': 'a1dc75a39c61601b980648f7c9f9f71d', 'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
'info_dict': { 'info_dict': {
@ -424,7 +442,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'skip': 'Error', 'skip': 'Error',
}, { }, {
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
'md5': 'f1837e563323b8a642a8ddeff0131f51', 'md5': '1e73ded21cb79bac065117e80c81dc88',
'info_dict': { 'info_dict': {
'id': '10049223', 'id': '10049223',
'ext': 'mp4', 'ext': 'mp4',
@ -432,13 +450,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'timestamp': 1636398000, 'timestamp': 1636398000,
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
'upload_date': '20211108', 'upload_date': '20211108',
}, 'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
}, { 'duration': 915,
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1', 'episode': 'tagesschau, 20:00 Uhr',
'playlist_count': 6, 'series': 'tagesschau',
'info_dict': { 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
'title': 'beforeigners/beforeigners/staffel-1',
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -602,6 +618,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
show { show {
title title
} }
image {
src
}
synopsis synopsis
title title
tracking { tracking {
@ -640,6 +659,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'description': description, 'description': description,
'timestamp': unified_timestamp(player_page.get('broadcastedOn')), 'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
'series': try_get(player_page, lambda x: x['show']['title']), 'series': try_get(player_page, lambda x: x['show']['title']),
'thumbnail': (media_collection.get('_previewImage')
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
or self.get_thumbnail_from_html(display_id, url)),
}) })
info.update(self._ARD_extract_episode_info(info['title'])) info.update(self._ARD_extract_episode_info(info['title']))
return info return info
def get_thumbnail_from_html(self, display_id, url):
webpage = self._download_webpage(url, display_id, fatal=False) or ''
return (
self._og_search_thumbnail(webpage, default=None)
or self._html_search_meta('thumbnailUrl', webpage, default=None))

View File

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code): def _handle_error(self, e, code):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.read(), None) error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered': if error.get('error') == 'required_registered':
self.raise_login_required() self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True) raise ExtractorError(error['error_description'], expected=True)

View File

@ -2,11 +2,11 @@ import functools
import itertools import itertools
import json import json
import re import re
import urllib.error
import xml.etree.ElementTree import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError, compat_str, compat_urlparse from ..compat import compat_str, compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -277,7 +277,7 @@ class BBCCoUkIE(InfoExtractor):
post_url, None, 'Logging in', data=urlencode_postdata(login_form), post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL}) headers={'Referer': self._LOGIN_URL})
if self._LOGIN_URL in urlh.geturl(): if self._LOGIN_URL in urlh.url:
error = clean_html(get_element_by_class('form-message', response)) error = clean_html(get_element_by_class('form-message', response))
if error: if error:
raise ExtractorError( raise ExtractorError(
@ -388,8 +388,8 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native', href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False) m3u8_id=format_id, fatal=False)
except ExtractorError as e: except ExtractorError as e:
if not (isinstance(e.exc_info[1], urllib.error.HTTPError) if not (isinstance(e.exc_info[1], HTTPError)
and e.exc_info[1].code in (403, 404)): and e.exc_info[1].status in (403, 404)):
raise raise
fmts = [] fmts = []
formats.extend(fmts) formats.extend(fmts)
@ -472,7 +472,7 @@ class BBCCoUkIE(InfoExtractor):
return programme_id, title, description, duration, formats, subtitles return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee: except ExtractorError as ee:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
raise raise
# fallback to legacy playlist # fallback to legacy playlist
@ -983,7 +983,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# Some playlist URL may fail with 500, at the same time # Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g. # the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: if isinstance(e.cause, HTTPError) and e.cause.status == 500:
continue continue
raise raise
if entry: if entry:

View File

@ -1,27 +1,197 @@
from functools import partial
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
format_field,
int_or_none,
js_to_json,
orderedSet,
parse_iso8601,
traverse_obj,
url_or_none,
)
class BibelTVIE(InfoExtractor): class BibelTVBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)' _GEO_COUNTRIES = ['AT', 'CH', 'DE']
_TESTS = [{ _GEO_BYPASS = False
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
'md5': '252f908192d611de038b8504b08bf97f', API_URL = 'https://www.bibeltv.de/mediathek/api'
'info_dict': { AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
'id': 'ref:329703',
'ext': 'mp4', def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
'title': 'Sprachkurs in Malaiisch', formats = []
'description': 'md5:3e9f197d29ee164714e67351cf737dfe', subtitles = {}
'timestamp': 1608316701, for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
'uploader_id': '5840105145001', media_ext = determine_ext(media_url)
'upload_date': '20201218', if media_ext == 'm3u8':
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
media_url, crn_id, live=is_live)
formats.extend(m3u8_formats)
subtitles.update(m3u8_subs)
elif media_ext == 'mpd':
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
formats.extend(mpd_formats)
subtitles.update(mpd_subs)
elif media_ext == 'mp4':
formats.append({'url': media_url})
else:
self.report_warning(f'Unknown format {media_ext!r}')
return formats, subtitles
@staticmethod
def _extract_base_info(data):
return {
'id': data['crn'],
**traverse_obj(data, {
'title': 'title',
'description': 'description',
'duration': ('duration', {partial(int_or_none, scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber',
'episode_number': 'episodeNumber',
'view_count': 'viewCount',
'like_count': 'likeCount',
}),
'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
'url': ('url', {url_or_none}),
}))),
} }
}, {
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374', def _extract_url_info(self, data):
'only_matching': True, return {
'_type': 'url',
'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
**self._extract_base_info(data),
}
def _extract_video_info(self, data):
crn_id = data['crn']
if data.get('drm'):
self.report_drm(crn_id)
json_data = self._download_json(
format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
errnote='No formats available') or {}
formats, subtitles = self._extract_formats_and_subtitles(
traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
return {
'_type': 'video',
**self._extract_base_info(data),
'formats': formats,
'subtitles': subtitles,
}
class BibelTVVideoIE(BibelTVBaseIE):
IE_DESC = 'BibelTV single video'
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
IE_NAME = 'bibeltv:video'
_TESTS = [{
'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
'md5': 'ec1c07efe54353780512e8a4103b612e',
'info_dict': {
'id': '344436',
'ext': 'mp4',
'title': 'Alte Wege',
'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
'timestamp': 1677877071,
'duration': 150.0,
'upload_date': '20230303',
'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
'episode': 'Episode 1',
'episode_number': 1,
'view_count': int,
'like_count': int,
},
'params': {
'format': '6',
},
}] }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url): def _real_extract(self, url):
crn_id = self._match_id(url) crn_id = self._match_id(url)
return self.url_result( video_data = traverse_obj(
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew') self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
if not video_data:
raise ExtractorError('Missing video data.')
return self._extract_video_info(video_data)
class BibelTVSeriesIE(BibelTVBaseIE):
IE_DESC = 'BibelTV series playlist'
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
IE_NAME = 'bibeltv:series'
_TESTS = [{
'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
'playlist_mincount': 400,
'info_dict': {
'id': '333485',
'title': 'Ein Wunder für jeden Tag',
'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
},
}]
def _real_extract(self, url):
crn_id = self._match_id(url)
webpage = self._download_webpage(url, crn_id)
nextjs_data = self._search_nextjs_data(webpage, crn_id)
series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
if not series_data:
raise ExtractorError('Missing series data.')
return self.playlist_result(
traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
crn_id, series_data.get('title'), clean_html(series_data.get('description')))
class BibelTVLiveIE(BibelTVBaseIE):
IE_DESC = 'BibelTV live program'
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
IE_NAME = 'bibeltv:live'
_TESTS = [{
'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
'info_dict': {
'id': 'bibeltv',
'ext': 'mp4',
'title': 're:Bibel TV',
'live_status': 'is_live',
'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bibeltv.de/livestreams/impuls/',
'only_matching': True,
}]
def _real_extract(self, url):
stream_id = self._match_id(url)
webpage = self._download_webpage(url, stream_id)
stream_data = self._search_json(
r'\\"video\\":', webpage, 'bibeltvData', stream_id,
transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
formats, subtitles = self._extract_formats_and_subtitles(
traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
return {
'id': stream_id,
'title': stream_data.get('title'),
'thumbnail': stream_data.get('poster'),
'is_live': True,
'formats': formats,
'subtitles': subtitles,
}

View File

@ -1,12 +1,14 @@
import base64 import base64
import functools import functools
import hashlib
import itertools import itertools
import math import math
import urllib.error import time
import urllib.parse import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome from ..dependencies import Cryptodome
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
@ -16,6 +18,7 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty,
make_archive_id, make_archive_id,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
@ -26,6 +29,8 @@ from ..utils import (
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_call,
unified_timestamp,
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
@ -81,7 +86,7 @@ class BilibiliBaseIE(InfoExtractor):
f'{line["content"]}\n\n') f'{line["content"]}\n\n')
return srt_data return srt_data
def _get_subtitles(self, video_id, initial_state, cid): def _get_subtitles(self, video_id, aid, cid):
subtitles = { subtitles = {
'danmaku': [{ 'danmaku': [{
'ext': 'xml', 'ext': 'xml',
@ -89,7 +94,8 @@ class BilibiliBaseIE(InfoExtractor):
}] }]
} }
for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []: video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
subtitles.setdefault(s['lan'], []).append({ subtitles.setdefault(s['lan'], []).append({
'ext': 'srt', 'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
@ -130,9 +136,20 @@ class BilibiliBaseIE(InfoExtractor):
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
yield from children yield from children
def _get_episodes_from_season(self, ss_id, url):
season_info = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', ss_id,
note='Downloading season info', query={'season_id': ss_id},
headers={'Referer': url, **self.geo_verification_headers()})
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
class BiliBiliIE(BilibiliBaseIE): class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)' _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL', 'url': 'https://www.bilibili.com/video/BV13x41117TL',
@ -280,19 +297,60 @@ class BiliBiliIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'note': 'video redirects to festival page',
'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
'info_dict': {
'id': 'BV1wP4y1P72h',
'ext': 'mp4',
'title': '牛虎年相交之际一首传统民族打击乐《牛斗虎》祝大家新春快乐虎年大吉【bilibili音乐虎闹新春】',
'timestamp': 1643947497,
'upload_date': '20220204',
'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
'uploader': '叨叨冯聊音乐',
'duration': 246.719,
'uploader_id': '528182630',
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'params': {'skip_download': True},
}, {
'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
'info_dict': {
'id': 'BV1ay4y1d77f',
'ext': 'mp4',
'title': '【崩坏3新春剧场】为特别的你送上祝福',
'timestamp': 1674273600,
'upload_date': '20230121',
'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
'uploader': '果蝇轰',
'duration': 1111.722,
'uploader_id': '8469526',
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'params': {'skip_download': True},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
video_data = initial_state['videoData'] is_festival = 'videoData' not in initial_state
if is_festival:
video_data = initial_state['videoInfo']
else:
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
video_data = initial_state['videoData']
video_id, title = video_data['bvid'], video_data.get('title') video_id, title = video_data['bvid'], video_data.get('title')
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
page_list_json = traverse_obj( page_list_json = not is_festival and traverse_obj(
self._download_json( self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id, 'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
@ -315,99 +373,135 @@ class BiliBiliIE(BilibiliBaseIE):
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {}
if is_festival:
play_info = self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note='Extracting festival video formats')['data']
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
'like_count': ('videoStatus', 'like', {int_or_none}),
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
}, get_all=False)
return { return {
**traverse_obj(initial_state, {
'uploader': ('upData', 'name'),
'uploader_id': ('upData', 'mid', {str_or_none}),
'like_count': ('videoData', 'stat', 'like', {int_or_none}),
'tags': ('tags', ..., 'tag_name'),
'thumbnail': ('videoData', 'pic', {url_or_none}),
}),
**festival_info,
**traverse_obj(video_data, {
'description': 'desc',
'timestamp': ('pubdate', {int_or_none}),
'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
'comment_count': ('stat', 'reply', {int_or_none}),
}, get_all=False),
'id': f'{video_id}{format_field(part_id, None, "_p%d")}', 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
'formats': self.extract_formats(play_info), 'formats': self.extract_formats(play_info),
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
'title': title, 'title': title,
'description': traverse_obj(initial_state, ('videoData', 'desc')),
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
'uploader': traverse_obj(initial_state, ('upData', 'name')),
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid), 'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, initial_state, cid), 'subtitles': self.extract_subtitles(video_id, aid, cid),
'__post_extractor': self.extract_comments(aid), '__post_extractor': self.extract_comments(aid),
'http_headers': {'Referer': url}, 'http_headers': {'Referer': url},
} }
class BiliBiliBangumiIE(BilibiliBaseIE): class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)' _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss897', 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': { 'info_dict': {
'id': 'ss897', 'id': '267851',
'ext': 'mp4', 'ext': 'mp4',
'series': '神的记事本', 'series': '鬼灭之刃',
'season': '神的记事本', 'series_id': '4358',
'season_id': 897, 'season': '鬼灭之刃',
'season_id': '26801',
'season_number': 1, 'season_number': 1,
'episode': '你与旅行包', 'episode': '残酷',
'episode_number': 2, 'episode_id': '267851',
'title': '神的记事本第2话 你与旅行包', 'episode_number': 1,
'duration': 1428.487, 'title': '1 残酷',
'timestamp': 1310809380, 'duration': 1425.256,
'upload_date': '20110716', 'timestamp': 1554566400,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
}, },
}, { 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if '您所在的地区无法观看本片' in webpage: if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted') raise GeoRestrictedError('This video is restricted')
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage elif '正在观看预览,大会员免费看全片' in webpage:
or '正在观看预览,大会员免费看全片' in webpage):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
formats = self.extract_formats(play_info) formats = self.extract_formats(play_info)
if (not formats and '成为大会员抢先看' in webpage if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
and play_info.get('durl') and not play_info.get('dash')):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next(( season_number = season_id and next((
idx + 1 for idx, e in enumerate( idx + 1 for idx, e in enumerate(
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id if e.get('season_id') == season_id
), None) ), None)
aid = episode_info.get('aid')
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': traverse_obj(initial_state, 'h1Title'), **traverse_obj(bangumi_info, {
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), 'series': ('series', 'series_title', {str}),
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), 'series_id': ('series', 'series_id', {str_or_none}),
'series': traverse_obj(initial_state, ('mediaInfo', 'series')), 'thumbnail': ('square_cover', {url_or_none}),
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), }),
'season_id': season_id, 'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season_id': str_or_none(season_id),
'season_number': season_number, 'season_number': season_number,
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), 'timestamp': int_or_none(episode_info.get('pub_time')),
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles( 'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), '__post_extractor': self.extract_comments(aid),
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), 'http_headers': headers,
'http_headers': {'Referer': url, **self.geo_verification_headers()},
} }
class BiliBiliBangumiMediaIE(InfoExtractor): class BiliBiliBangumiMediaIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)' _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/bangumi/media/md24097891', 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
@ -420,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id) webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
episode_list = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', media_id,
query={'season_id': initial_state['mediaInfo']['season_id']},
note='Downloading season info')['result']['main_section']['episodes']
return self.playlist_result((
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
for entry in episode_list), media_id) _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
},
'playlist_mincount': 26
}]
def _real_extract(self, url):
ss_id = self._match_id(url)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
class BilibiliSpaceBaseIE(InfoExtractor): class BilibiliSpaceBaseIE(InfoExtractor):
@ -452,21 +556,65 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676', 'id': '3985676',
}, },
'playlist_mincount': 178, 'playlist_mincount': 178,
}, {
'url': 'https://space.bilibili.com/313580179/video',
'info_dict': {
'id': '313580179',
},
'playlist_mincount': 92,
}] }]
def _extract_signature(self, playlist_id):
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
img_key = traverse_obj(
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
sub_key = traverse_obj(
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
session_key = img_key + sub_key
signature_values = []
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
signature_values.append(char_at_position)
return ''.join(signature_values)[:32]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url: if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL') 'To download audios, add a "/audio" to the URL')
signature = self._extract_signature(playlist_id)
def fetch_page(page_idx): def fetch_page(page_idx):
query = {
'keyword': '',
'mid': playlist_id,
'order': 'pubdate',
'order_avoided': 'true',
'platform': 'web',
'pn': page_idx + 1,
'ps': 30,
'tid': 0,
'web_location': 1550101,
'wts': int(time.time()),
}
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try: try:
response = self._download_json('https://api.bilibili.com/x/space/arc/search', response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', playlist_id, note=f'Downloading page {page_idx}', query=query)
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412: if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError( raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise raise
@ -494,9 +642,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE): class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio' _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
_TESTS = [{ _TESTS = [{
'url': 'https://space.bilibili.com/3985676/audio', 'url': 'https://space.bilibili.com/313580179/audio',
'info_dict': { 'info_dict': {
'id': '3985676', 'id': '313580179',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}] }]
@ -894,15 +1042,15 @@ class BiliIntlBaseIE(InfoExtractor):
} }
def _perform_login(self, username, password): def _perform_login(self, username, password):
if not Cryptodome: if not Cryptodome.RSA:
raise ExtractorError('pycryptodomex not found. Please install', expected=True) raise ExtractorError('pycryptodomex not found. Please install', expected=True)
key_data = self._download_json( key_data = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None, 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
note='Downloading login key', errnote='Unable to download login key')['data'] note='Downloading login key', errnote='Unable to download login key')['data']
public_key = Cryptodome.PublicKey.RSA.importKey(key_data['key']) public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.Cipher.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
login_post = self._download_json( login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
'username': username, 'username': username,
@ -995,6 +1143,53 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$', 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
'upload_date': '20221212', 'upload_date': '20221212',
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation', 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
},
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
'info_dict': {
'id': '340317',
'ext': 'mp4',
'timestamp': 1604057820,
'upload_date': '20201030',
'episode_number': 5,
'title': 'E5 - My Own Steel',
'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode': 'Episode 5',
'comment_count': int,
'chapters': [{
'start_time': 0,
'end_time': 61.0,
'title': '<Untitled Chapter 1>'
}, {
'start_time': 61.0,
'end_time': 134.0,
'title': 'Intro'
}, {
'start_time': 1290.0,
'end_time': 1379.0,
'title': 'Outro'
}],
},
'params': {
'getcomments': True
}
}, {
# user generated content comment extraction
'url': 'https://www.bilibili.tv/en/video/2045730385',
'info_dict': {
'id': '2045730385',
'ext': 'mp4',
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
'timestamp': 1667891924,
'upload_date': '20221108',
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
'comment_count': int,
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
},
'params': {
'getcomments': True
} }
}, { }, {
# episode id without intro and outro # episode id without intro and outro
@ -1054,11 +1249,69 @@ class BiliIntlIE(BiliIntlBaseIE):
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
return merge_dicts( return merge_dicts(
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), { self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
'title': self._html_search_meta('og:title', webpage), 'title': self._html_search_meta('og:title', webpage),
'description': self._html_search_meta('og:description', webpage) 'description': self._html_search_meta('og:description', webpage)
}) })
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
comment_api_raw_data = self._download_json(
'https://api.bilibili.tv/reply/web/detail', display_id,
note=f'Downloading reply comment of {root_id} - {next_id}',
query={
'platform': 'web',
'ps': 20, # comment's reply per page (default: 3)
'root': root_id,
'next': next_id,
})
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
yield {
'author': traverse_obj(replies, ('member', 'name')),
'author_id': traverse_obj(replies, ('member', 'mid')),
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
'text': traverse_obj(replies, ('content', 'message')),
'id': replies.get('rpid'),
'like_count': int_or_none(replies.get('like_count')),
'parent': replies.get('parent'),
'timestamp': unified_timestamp(replies.get('ctime_text'))
}
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
yield from self._get_comments_reply(
root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
def _get_comments(self, video_id, ep_id):
for i in itertools.count(0):
comment_api_raw_data = self._download_json(
'https://api.bilibili.tv/reply/web/root', video_id,
note=f'Downloading comment page {i + 1}',
query={
'platform': 'web',
'pn': i, # page number
'ps': 20, # comment per page (default: 20)
'oid': video_id,
'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
'sort_type': 1, # 1: best, 2: recent
})
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
yield {
'author': traverse_obj(replies, ('member', 'name')),
'author_id': traverse_obj(replies, ('member', 'mid')),
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
'text': traverse_obj(replies, ('content', 'message')),
'id': replies.get('rpid'),
'like_count': int_or_none(replies.get('like_count')),
'timestamp': unified_timestamp(replies.get('ctime_text')),
'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
}
if replies.get('count'):
yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
break
def _real_extract(self, url): def _real_extract(self, url):
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
video_id = ep_id or aid video_id = ep_id or aid
@ -1086,7 +1339,8 @@ class BiliIntlIE(BiliIntlBaseIE):
**self._extract_video_metadata(url, video_id, season_id), **self._extract_video_metadata(url, video_id, season_id),
'formats': self._get_formats(ep_id=ep_id, aid=aid), 'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
'chapters': chapters 'chapters': chapters,
'__post_extractor': self.extract_comments(video_id, ep_id)
} }

View File

@ -2,9 +2,9 @@ import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
OnDemandPagedList, OnDemandPagedList,
clean_html, clean_html,
get_element_by_class, get_element_by_class,
@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
def _check_format(self, video_url, video_id): def _check_format(self, video_url, video_id):
urls = orderedSet( urls = orderedSet(
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153')) for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
'seedp29xb', 'zb10-7gsop1v78'))
for url in urls: for url in urls:
try: try:
response = self._request_webpage( response = self._request_webpage(

View File

@ -0,0 +1,167 @@
import json
from .common import InfoExtractor
from ..utils import strip_or_none, traverse_obj
class BlerpIE(InfoExtractor):
IE_NAME = 'blerp'
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
'info_dict': {
'id': '6320fe8745636cb4dd677a5a',
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
'uploader': 'luminousaj',
'uploader_id': '5fb81e51aa66ae000c395478',
'ext': 'mp3',
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
}
}, {
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
'info_dict': {
'id': '5bc94ef4796001000498429f',
'title': 'Yee',
'uploader': '179617322678353920',
'uploader_id': '5ba99cf71386730004552c42',
'ext': 'mp3',
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
}
}]
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
_GRAPHQL_QUERY = (
'''query webBitePageGetBite($_id: MongoID!) {
web {
biteById(_id: $_id) {
...bitePageFrag
__typename
}
__typename
}
}
fragment bitePageFrag on Bite {
_id
title
userKeywords
keywords
color
visibility
isPremium
owned
price
extraReview
isAudioExists
image {
filename
original {
url
__typename
}
__typename
}
userReactions {
_id
reactions
createdAt
__typename
}
topReactions
totalSaveCount
saved
blerpLibraryType
license
licenseMetaData
playCount
totalShareCount
totalFavoriteCount
totalAddedToBoardCount
userCategory
userAudioQuality
audioCreationState
transcription
userTranscription
description
createdAt
updatedAt
author
listingType
ownerObject {
_id
username
profileImage {
filename
original {
url
__typename
}
__typename
}
__typename
}
transcription
favorited
visibility
isCurated
sourceUrl
audienceRating
strictAudienceRating
ownerId
reportObject {
reportedContentStatus
__typename
}
giphy {
mp4
gif
__typename
}
audio {
filename
original {
url
__typename
}
mp3 {
url
__typename
}
__typename
}
__typename
}
''')
def _real_extract(self, url):
audio_id = self._match_id(url)
data = {
'operationName': self._GRAPHQL_OPERATIONNAME,
'query': self._GRAPHQL_QUERY,
'variables': {
'_id': audio_id
}
}
headers = {
'Content-Type': 'application/json'
}
json_result = self._download_json('https://api.blerp.com/graphql',
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
bite_json = json_result['data']['web']['biteById']
info_dict = {
'id': bite_json['_id'],
'url': bite_json['audio']['mp3']['url'],
'title': bite_json['title'],
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
'ext': 'mp3',
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
}
return info_dict

View File

@ -1,86 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none, str_or_none, traverse_obj
class BooyahBaseIE(InfoExtractor):
_BOOYAH_SESSION_KEY = None
def _real_initialize(self):
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
def _get_comments(self, video_id):
comment_json = self._download_json(
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
return [{
'id': comment.get('comment_id'),
'author': comment.get('from_nickname'),
'author_id': comment.get('from_uid'),
'author_thumbnail': comment.get('from_thumbnail'),
'text': comment.get('content'),
'timestamp': comment.get('create_time'),
'like_count': comment.get('like_cnt'),
} for comment in comment_json.get('comment_list') or ()]
class BooyahClipsIE(BooyahBaseIE):
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
_TESTS = [{
'url': 'https://booyah.live/clips/13887261322952306617',
'info_dict': {
'id': '13887261322952306617',
'ext': 'mp4',
'view_count': int,
'duration': 30,
'channel_id': 90565760,
'like_count': int,
'title': 'Cayendo con estilo 😎',
'uploader': '♡LɪMER',
'comment_count': int,
'uploader_id': '90565760',
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
'upload_date': '20220617',
'timestamp': 1655490556,
'modified_timestamp': 1655490556,
'modified_date': '20220617',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
formats = []
for video_data in json_data['playback']['endpoint_list']:
formats.extend(({
'url': video_data.get('stream_url'),
'ext': 'mp4',
'height': video_data.get('resolution'),
}, {
'url': video_data.get('download_url'),
'ext': 'mp4',
'format_note': 'Watermarked',
'height': video_data.get('resolution'),
'preference': -10,
}))
return {
'id': video_id,
'title': traverse_obj(json_data, ('playback', 'name')),
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
'formats': formats,
'view_count': traverse_obj(json_data, ('playback', 'views')),
'like_count': traverse_obj(json_data, ('playback', 'likes')),
'duration': traverse_obj(json_data, ('playback', 'duration')),
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
'uploader': traverse_obj(json_data, ('user', 'nickname')),
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
}

View File

@ -0,0 +1,102 @@
from .common import InfoExtractor
from ..utils import (
js_to_json,
traverse_obj,
unified_timestamp
)
class BoxCastVideoIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://boxcast\.tv/(?:
view-embed/|
channel/\w+\?(?:[^#]+&)?b=|
video-portal/(?:\w+/){2}
)(?P<id>[\w-]+)'''
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
_TESTS = [{
'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
'info_dict': {
'id': 'da1eqqgkacngd5djlqld',
'ext': 'mp4',
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
'release_timestamp': 1670686812,
'release_date': '20221210',
'uploader_id': 're8w0v8hohhvpqtbskpe',
'uploader': 'Children\'s Health Defense',
}
}, {
'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
'info_dict': {
'id': 'otbpltj2kzkveo2qz3ad',
'ext': 'mp4',
'uploader_id': 'vctwevwntun3o0ikq7af',
'uploader': 'Legacy Christian Church',
'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
}
}, {
'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
'info_dict': {
'id': 'ssihlw5gvfij2by8tkev',
'ext': 'mp4',
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
'release_date': '20230101',
'uploader_id': 'ds25vaazhlu4ygcvffid',
'release_timestamp': 1672543201,
'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
}
}]
_WEBPAGE_TESTS = [{
'url': 'https://childrenshealthdefense.eu/live-stream/',
'info_dict': {
'id': 'da1eqqgkacngd5djlqld',
'ext': 'mp4',
'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
'release_timestamp': 1670686812,
'release_date': '20221210',
'uploader_id': 're8w0v8hohhvpqtbskpe',
'uploader': 'Children\'s Health Defense',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage_json_data = self._search_json(
r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
transform_source=js_to_json, default={})
# Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
broadcast_json_data = (
traverse_obj(webpage_json_data, ('broadcast', 'data'))
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
view_json_data = (
traverse_obj(webpage_json_data, ('view', 'data'))
or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
display_id, fatal=False) or {})
formats, subtitles = [], {}
if view_json_data.get('status') == 'recorded':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
view_json_data['playlist'], display_id)
return {
'id': str(broadcast_json_data['id']),
'title': (broadcast_json_data.get('name')
or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
'description': (broadcast_json_data.get('description')
or self._html_search_meta(['og:description', 'twitter:description'], webpage)
or None),
'thumbnail': (broadcast_json_data.get('preview')
or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
'formats': formats,
'subtitles': subtitles,
'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
'uploader': broadcast_json_data.get('account_name'),
'uploader_id': broadcast_json_data.get('account_id'),
}

View File

@ -0,0 +1,318 @@
import json
import re
from .common import InfoExtractor
from ..utils import (
classproperty,
int_or_none,
traverse_obj,
urljoin
)
class BrainPOPBaseIE(InfoExtractor):
_NETRC_MACHINE = 'brainpop'
_ORIGIN = '' # So that _VALID_URL doesn't crash
_LOGIN_ERRORS = {
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
}
@classproperty
def _VALID_URL(cls):
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
formats = []
formats = self._extract_m3u8_formats(
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
formats.append({
'format_id': format_id,
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
})
for f in formats:
f.update(extra_fields)
return formats
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
formats = []
additional_key_formats = {
'%s': {},
'ad_%s': {
'format_note': 'Audio description',
'source_preference': -2
}
}
for additional_key_format, additional_key_fields in additional_key_formats.items():
for key_quality, key_index in enumerate(('high', 'low')):
full_key_index = additional_key_format % (key_format % key_index)
if data.get(full_key_index):
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
'quality': -1 - key_quality,
**additional_key_fields,
**extra_fields
}))
return formats
def _perform_login(self, username, password):
login_res = self._download_json(
'https://api.brainpop.com/api/login', None,
data=json.dumps({'username': username, 'password': password}).encode(),
headers={
'Content-Type': 'application/json',
'Referer': self._ORIGIN
}, note='Logging in', errnote='Unable to log in', expected_status=400)
status_code = int_or_none(login_res['status_code'])
if status_code != 1505:
self.report_warning(
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
or f'Got status code {status_code}')
class BrainPOPIE(BrainPOPBaseIE):
_ORIGIN = 'https://www.brainpop.com'
_VIDEO_URL = 'https://svideos.brainpop.com'
_HLS_URL = 'https://hls.brainpop.com'
_CDN_URL = 'https://cdn.brainpop.com'
_TESTS = [{
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
'md5': '3ead374233ae74c7f1b0029a01c972f0',
'info_dict': {
'id': '1f3259fa457292b4',
'ext': 'mp4',
'title': 'Martin Luther King, Jr.',
'display_id': 'martinlutherkingjr',
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
},
}, {
'url': 'https://www.brainpop.com/science/space/bigbang/',
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
'info_dict': {
'id': 'acae52cd48c99acf',
'ext': 'mp4',
'title': 'Big Bang',
'display_id': 'bigbang',
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
},
'skip': 'Requires login',
}]
def _real_extract(self, url):
slug, display_id = self._match_valid_url(url).group('slug', 'id')
movie_data = self._download_json(
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
'Downloading movie data JSON', 'Unable to download movie data')['data']
topic_data = traverse_obj(self._download_json(
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
('data', 'topic'), expected_type=dict) or movie_data['topic']
if not traverse_obj(movie_data, ('access', 'allow')):
reason = traverse_obj(movie_data, ('access', 'reason'))
if 'logged' in reason:
self.raise_login_required(reason, metadata_available=True)
else:
self.raise_no_formats(reason, video_id=display_id)
movie_feature = movie_data['feature']
movie_feature_data = movie_feature['data']
formats, subtitles = [], {}
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
'language': movie_feature.get('language') or 'en',
'language_preference': 10
}))
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
'language': lang,
'language_preference': -10
}))
# TODO: Do localization fields also have subtitles?
for name, url in movie_feature_data.items():
lang = self._search_regex(
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
if lang and url:
subtitles.setdefault(lang, []).append({
'url': urljoin(self._CDN_URL, url)
})
return {
'id': topic_data['topic_id'],
'display_id': display_id,
'title': topic_data.get('name'),
'description': topic_data.get('synopsis'),
'formats': formats,
'subtitles': subtitles,
}
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
def _parse_js_topic_data(self, topic_data, display_id, token):
movie_data = topic_data['movies']
# TODO: Are there non-burned subtitles?
formats = self._extract_adaptive_formats(movie_data, token, display_id)
return {
'id': topic_data['EntryID'],
'display_id': display_id,
'title': topic_data.get('name'),
'alt_title': topic_data.get('title'),
'description': topic_data.get('synopsis'),
'formats': formats,
}
def _real_extract(self, url):
slug, display_id = self._match_valid_url(url).group('slug', 'id')
webpage = self._download_webpage(url, display_id)
topic_data = self._search_json(
r'var\s+content\s*=\s*', webpage, 'content data',
display_id, end_pattern=';')['category']['unit']['topic']
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
return self._parse_js_topic_data(topic_data, display_id, token)
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
_ORIGIN = 'https://jr.brainpop.com'
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
_HLS_URL = 'https://hls-jr.brainpop.com'
_CDN_URL = 'https://cdn-jr.brainpop.com'
_TESTS = [{
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
'info_dict': {
'id': '347',
'ext': 'mp4',
'title': 'Emotions',
'display_id': 'emotions',
},
}, {
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
'info_dict': {
'id': '29',
'ext': 'mp4',
'title': 'Arctic Habitats',
'display_id': 'arctichabitats',
},
'skip': 'Requires login',
}]
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
_ORIGIN = 'https://ell.brainpop.com'
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
_HLS_URL = 'https://hls-esl.brainpop.com'
_CDN_URL = 'https://cdn-esl.brainpop.com'
_TESTS = [{
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
'info_dict': {
'id': '1',
'ext': 'mp4',
'title': 'Lesson 1',
'display_id': 'lesson1',
'alt_title': 'Personal Pronouns',
},
}, {
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
'info_dict': {
'id': '101',
'ext': 'mp4',
'title': 'Lesson 5',
'display_id': 'lesson5',
'alt_title': 'Review: Unit 6',
},
'skip': 'Requires login',
}]
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
IE_DESC = 'BrainPOP Español'
_ORIGIN = 'https://esp.brainpop.com'
_VIDEO_URL = 'https://svideos.brainpop.com'
_HLS_URL = 'https://hls.brainpop.com'
_CDN_URL = 'https://cdn.brainpop.com/mx'
_TESTS = [{
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
'info_dict': {
'id': '3893',
'ext': 'mp4',
'title': 'Ecosistemas',
'display_id': 'ecosistemas',
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
},
}, {
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
'md5': '98c1b9559e0e33777209c425cda7dac4',
'info_dict': {
'id': '7146',
'ext': 'mp4',
'title': 'Emily Dickinson',
'display_id': 'emily_dickinson',
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
},
'skip': 'Requires login',
}]
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
IE_DESC = 'BrainPOP Français'
_ORIGIN = 'https://fr.brainpop.com'
_VIDEO_URL = 'https://svideos.brainpop.com'
_HLS_URL = 'https://hls.brainpop.com'
_CDN_URL = 'https://cdn.brainpop.com/fr'
_TESTS = [{
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
'md5': '97e7f48af8af93f8a2be11709f239371',
'info_dict': {
'id': '1651',
'ext': 'mp4',
'title': 'Sources d\'énergie',
'display_id': 'sourcesdenergie',
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
},
}, {
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
'info_dict': {
'id': '5803',
'ext': 'mp4',
'title': 'Plagiat',
'display_id': 'plagiat',
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
},
'skip': 'Requires login',
}]
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
IE_DESC = 'BrainPOP Hebrew'
_ORIGIN = 'https://il.brainpop.com'
_VIDEO_URL = 'https://svideos.brainpop.com'
_HLS_URL = 'https://hls.brainpop.com'
_CDN_URL = 'https://cdn.brainpop.com/he'
_TESTS = [{
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
'info_dict': {
'id': '3782',
'ext': 'mp4',
'title': 'md5:e993632fcda0545d9205602ec314ad67',
'display_id': 'subjects_3782',
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
},
}]

View File

@ -1,117 +1,189 @@
import re
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
smuggle_url, extract_attributes,
update_url_query,
int_or_none,
float_or_none, float_or_none,
try_get, get_element_html_by_class,
dict_get, int_or_none,
merge_dicts,
parse_age_limit,
remove_end,
str_or_none,
traverse_obj,
unescapeHTML,
unified_timestamp,
update_url_query,
url_or_none,
) )
class BravoTVIE(AdobePassIE): class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
'info_dict': { 'info_dict': {
'id': 'epL0pmK1kQlT', 'id': '3923059',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Top Chef Season 16 Winner Is...', 'title': 'The Top Chef Season 16 Winner Is...',
'description': 'Find out who takes the title of Top Chef!', 'description': 'Find out who takes the title of Top Chef!',
'uploader': 'NBCU-BRAV',
'upload_date': '20190314', 'upload_date': '20190314',
'timestamp': 1552591860, 'timestamp': 1552591860,
'season_number': 16, 'season_number': 16,
'episode_number': 15, 'episode_number': 15,
'series': 'Top Chef', 'series': 'Top Chef',
'episode': 'The Top Chef Season 16 Winner Is...', 'episode': 'The Top Chef Season 16 Winner Is...',
'duration': 190.0, 'duration': 190.357,
} 'season': 'Season 16',
'thumbnail': r're:^https://.+\.jpg',
},
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', 'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
'only_matching': True, 'info_dict': {
'id': '9000234570',
'ext': 'mp4',
'title': 'London Calling',
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
'upload_date': '20230310',
'timestamp': 1678410000,
'season_number': 20,
'episode_number': 1,
'series': 'Top Chef',
'episode': 'London Calling',
'duration': 3266.03,
'season': 'Season 20',
'chapters': 'count:7',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
'info_dict': {
'id': '3692045',
'ext': 'mp4',
'title': 'Closing Night',
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
'upload_date': '20180401',
'timestamp': 1522623600,
'season_number': 1,
'episode_number': 1,
'series': 'In Ice Cold Blood',
'episode': 'Closing Night',
'duration': 2629.051,
'season': 'Season 1',
'chapters': 'count:6',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, { }, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
'info_dict': {
'id': '3974019',
'ext': 'mp4',
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
'upload_date': '20190617',
'timestamp': 1560790800,
'season_number': 2,
'episode_number': 16,
'series': 'In Ice Cold Blood',
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
'duration': 68.235,
'season': 'Season 2',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = self._match_valid_url(url).groups() site, display_id = self._match_valid_url(url).group('site', 'id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex( settings = self._search_json(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
display_id) tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
info = {}
query = { query = {
'mbr': 'true', 'manifest': 'm3u',
'formats': 'm3u,mpeg4',
} }
account_pid, release_pid = [None] * 2
tve = settings.get('ls_tve')
if tve: if tve:
query['manifest'] = 'm3u' account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) account_id = tve['data-mpx-media-account-id']
if mobj: metadata = self._parse_json(
account_pid, tp_path = mobj.groups() tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
release_pid = tp_path.strip('/').split('/')[-1] video_id = tve.get('data-guid') or metadata['guid']
else: if tve.get('data-entitlement') == 'auth':
account_pid = 'HNK2IC' auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
tp_path = release_pid = tve['release_pid'] site = remove_end(site, 'tv')
if tve.get('entitlement') == 'auth': release_pid = tve['data-release-pid']
adobe_pass = settings.get('tve_adobe_auth', {})
if site == 'bravotv':
site = 'bravo'
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId') or site, tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
tve['title'], release_pid, tve.get('rating')) tve['data-title'], release_pid, tve.get('data-rating'))
query['auth'] = self._extract_mvpd_auth( query.update({
url, release_pid, 'switch': 'HLSServiceSecure',
adobe_pass.get('adobePassRequestorId') or site, resource) 'auth': self._extract_mvpd_auth(
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
})
else: else:
shared_playlist = settings['ls_playlist'] ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
account_pid = shared_playlist['account_pid'] account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] account_id = ls_playlist['mpxMediaAccountId']
tp_path = release_pid = metadata.get('release_pid') video_id = ls_playlist['defaultGuid']
if not release_pid: metadata = traverse_obj(
release_pid = metadata['guid'] ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
tp_path = 'media/guid/2140479951/' + release_pid
info.update({
'title': metadata['title'],
'description': metadata.get('description'),
'season_number': int_or_none(metadata.get('season_num')),
'episode_number': int_or_none(metadata.get('episode_num')),
})
query['switch'] = 'progressive'
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
tp_metadata = self._download_json( tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
display_id, fatal=False)
if tp_metadata:
info.update({
'title': tp_metadata.get('title'),
'description': tp_metadata.get('description'),
'duration': float_or_none(tp_metadata.get('duration'), 1000),
'season_number': int_or_none(
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
'episode_number': int_or_none(
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
# For some reason the series is sometimes wrapped into a single element array.
'series': try_get(
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
lambda x: x[0] if isinstance(x, list) else x,
expected_type=str),
'episode': dict_get(
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
})
info.update({ seconds_or_none = lambda x: float_or_none(x, 1000)
'_type': 'url_transparent', chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'id': release_pid, 'start_time': ('startTime', {seconds_or_none}),
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}), 'end_time': ('endTime', {seconds_or_none}),
'ie_key': 'ThePlatform', }))
}) # prune pointless single chapters that span the entire duration from short videos
return info if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
chapters = None
m3u8_url = self._request_webpage(HEADRequest(
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
if 'mpeg_cenc' in m3u8_url:
self.report_drm(video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'chapters': chapters,
**merge_dicts(traverse_obj(tp_metadata, {
'title': 'title',
'description': 'description',
'duration': ('duration', {seconds_or_none}),
'timestamp': ('pubDate', {seconds_or_none}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
}, get_all=False), traverse_obj(metadata, {
'title': 'title',
'description': 'description',
'duration': ('durationInSeconds', {int_or_none}),
'timestamp': ('airDate', {unified_timestamp}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'season_number': ('seasonNumber', {int_or_none}),
'episode_number': ('episodeNumber', {int_or_none}),
'episode': 'episodeTitle',
'series': 'show',
}))
}

View File

@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_HTTPError,
compat_parse_qs, compat_parse_qs,
compat_urlparse, compat_urlparse,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
clean_html, clean_html,
dict_get, dict_get,
@ -575,6 +575,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
self.raise_no_formats( self.raise_no_formats(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
headers.pop('Authorization', None) # or else http formats will give error 400
for f in formats: for f in formats:
f.setdefault('http_headers', {}).update(headers) f.setdefault('http_headers', {}).update(headers)
@ -895,8 +896,9 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
store_pk(policy_key) store_pk(policy_key)
return policy_key return policy_key
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) token = smuggled_data.get('token')
headers = {} api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
headers = {'Authorization': f'Bearer {token}'} if token else {}
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
if referrer: if referrer:
headers.update({ headers.update({
@ -913,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
json_data = self._download_json(api_url, video_id, headers=headers) json_data = self._download_json(api_url, video_id, headers=headers)
break break
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code'] message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO': if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)

View File

@ -1,9 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
traverse_obj,
float_or_none,
int_or_none
)
class CallinIE(InfoExtractor): class CallinIE(InfoExtractor):
@ -35,6 +31,54 @@ class CallinIE(InfoExtractor):
'episode_number': 1, 'episode_number': 1,
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd' 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
} }
}, {
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'md5': '14ede27ee2c957b7e4db93140fc0745c',
'info_dict': {
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
'ext': 'ts',
'title': 'FCC Commissioner Brendan Carr on Elons Starlink',
'description': 'Or, why the government doesnt like SpaceX',
'channel': 'The Pull Request',
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
'duration': 3182.472,
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
'uploader_url': 'http://thepullrequest.com',
'upload_date': '20220902',
'episode': 'FCC Commissioner Brendan Carr on Elons Starlink',
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'series': 'The Pull Request',
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
'view_count': int,
'uploader': 'Antonio García Martínez',
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
'timestamp': 1662100688.005,
}
}, {
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
'md5': '16f704ddbf82a27e3930533b12062f07',
'info_dict': {
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
'ext': 'ts',
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'description': 'Lets talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
'channel': 'The DEBRIEF With Briahna Joy Gray',
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
'duration': 10043.16,
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
'uploader_url': 'http://patreon.com/badfaithpodcast',
'upload_date': '20220826',
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'display_id': 'episode-',
'series': 'The DEBRIEF With Briahna Joy Gray',
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
'view_count': int,
'uploader': 'Briahna Gray',
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
'timestamp': 1661476708.282,
}
}] }]
def try_get_user_name(self, d): def try_get_user_name(self, d):
@ -86,6 +130,7 @@ class CallinIE(InfoExtractor):
return { return {
'id': id, 'id': id,
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,

View File

@ -0,0 +1,85 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
get_elements_by_class,
join_nonempty,
traverse_obj,
unified_timestamp,
urljoin,
)
class CamFMShowIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
_TESTS = [{
'playlist_mincount': 5,
'url': 'https://camfm.co.uk/shows/soul-mining/',
'info_dict': {
'id': 'soul-mining',
'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
'title': 'Soul Mining',
'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
},
}]
def _real_extract(self, url):
show_id = self._match_id(url)
page = self._download_webpage(url, show_id)
return {
'_type': 'playlist',
'id': show_id,
'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE)
for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)],
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)),
'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False),
'description': clean_html(get_element_by_class('small-12 medium-8 cell', page))
}
class CamFMEpisodeIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://camfm.co.uk/player/43336',
'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
'info_dict': {
'id': '43336',
'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
'ext': 'mp3',
'upload_date': '20230516',
'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
'timestamp': 1684263600,
'series': 'AITAA: Am I the Agony Aunt?',
'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
'categories': ['Entertainment'],
}
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
page = self._download_webpage(url, episode_id)
audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id)
caption = get_element_by_class('caption', page)
series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption))
card_section = get_element_by_class('card-section', page)
date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False)
return {
'id': episode_id,
'title': join_nonempty(series, date, delim=' - '),
'formats': traverse_obj(audios, (..., 'formats', ...)),
'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings
'series': series,
'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)),
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)',
page, 'thumbnail', fatal=False)),
'categories': get_elements_by_class('label', caption),
'was_live': True,
}

View File

@ -1,9 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import int_or_none, url_or_none
ExtractorError,
int_or_none,
url_or_none,
)
class CamModelsIE(InfoExtractor): class CamModelsIE(InfoExtractor):
@ -17,32 +13,11 @@ class CamModelsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
webpage = self._download_webpage(
url, user_id, headers=self.geo_verification_headers())
manifest_root = self._html_search_regex(
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
if not manifest_root:
ERRORS = (
("I'm offline, but let's stay connected", 'This user is currently offline'),
('in a private show', 'This user is in a private show'),
('is currently performing LIVE', 'This model is currently performing live'),
)
for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json( manifest = self._download_json(
'%s%s.json' % (manifest_root, user_id), user_id) 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
formats = [] formats = []
thumbnails = []
for format_id, format_dict in manifest['formats'].items(): for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
@ -82,12 +57,20 @@ class CamModelsIE(InfoExtractor):
'quality': -10, 'quality': -10,
}) })
else: else:
if format_id == 'jpeg':
thumbnails.append({
'url': f['url'],
'width': f['width'],
'height': f['height'],
'format_id': f['format_id'],
})
continue continue
formats.append(f) formats.append(f)
return { return {
'id': user_id, 'id': user_id,
'title': user_id, 'title': user_id,
'thumbnails': thumbnails,
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
'age_limit': 18 'age_limit': 18

View File

@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
# response = self._request_webpage( # response = self._request_webpage(
# HEADRequest(fmt_url), video_id, # HEADRequest(fmt_url), video_id,
# 'Checking if the video is georestricted') # 'Checking if the video is georestricted')
# if '/blocage' in response.geturl(): # if '/blocage' in response.url:
# raise ExtractorError( # raise ExtractorError(
# 'The video is not available in your country', # 'The video is not available in your country',
# expected=True) # expected=True)

View File

@ -1,383 +0,0 @@
import json
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
float_or_none,
get_element_by_class,
int_or_none,
merge_dicts,
str_or_none,
strip_or_none,
url_or_none,
urlencode_postdata
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'mp4',
'title': 'Nachtwacht: De Greystook',
'description': 'Nachtwacht: De Greystook',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.02,
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
_GEO_BYPASS = False
_HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native',
'HLS_AES': 'm3u8_native',
}
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
def _real_extract(self, url):
mobj = self._match_valid_url(url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = None
if site_id != 'vrtvideo':
# Old API endpoint, serves more formats but may fail for some videos
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id, 'Downloading asset JSON',
'Unable to download asset JSON', fatal=False)
# New API endpoint
if not data:
vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
video_id, note='refreshtoken: Retrieve vrtnutoken',
errnote='refreshtoken failed')['vrtnutoken']
headers = self.geo_verification_headers()
headers.update({'Content-Type': 'application/json; charset=utf-8'})
vrtPlayerToken = self._download_json(
'%s/tokens' % self._REST_API_BASE, video_id,
'Downloading token', headers=headers, data=json.dumps({
'identityToken': vrtnutoken
}).encode('utf-8'))['vrtPlayerToken']
data = self._download_json(
'%s/videos/%s' % (self._REST_API_BASE, video_id),
video_id, 'Downloading video JSON', query={
'vrtPlayerToken': vrtPlayerToken,
'client': 'null',
}, expected_status=400)
if 'title' not in data:
code = data.get('code')
if code == 'AUTHENTICATION_REQUIRED':
self.raise_login_required()
elif code == 'INVALID_LOCATION':
self.raise_geo_restricted(countries=['BE'])
raise ExtractorError(data.get('message') or code, expected=True)
# Note: The title may be an empty string
title = data['title'] or f'{site_id} {video_id}'
description = data.get('description')
formats = []
subtitles = {}
for target in data['targetUrls']:
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HSS':
fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
},
'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
'info_dict': {
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
'display_id': 'pieter-0167',
'ext': 'mp4',
'title': 'Pieter 0167',
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2553.08,
'subtitles': {
'nl': [{
'ext': 'vtt',
}],
},
},
'params': {
'skip_download': True,
},
'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
'info_dict': {
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
'display_id': 'emma-pakt-thilly-aan',
'ext': 'mp4',
'title': 'Emma pakt Thilly aan',
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 118.24,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
site_id, display_id = mobj.group('site_id'), mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None))
video_id = self._html_search_regex(
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
_TESTS = [{
# Available via old API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
'info_dict': {
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'mp4',
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'series': 'Postbus X',
'season': 'Seizoen 1989',
'season_number': 1989,
'episode': 'De zwarte weduwe',
'episode_number': 1,
'timestamp': 1595822400,
'upload_date': '20200727',
},
'skip': 'This video is only available for registered users',
'expected_warnings': ['is not a supported codec'],
}, {
# Only available via new API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
'info_dict': {
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
'ext': 'mp4',
'title': 'Aflevering 5',
'description': 'Wie valt door de mand tijdens een missie?',
'duration': 2967.06,
'season': 'Season 1',
'season_number': 1,
'episode_number': 5,
},
'skip': 'This video is only available for registered users',
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _perform_login(self, username, password):
auth_info = self._gigya_login({
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
})
if auth_info.get('errorDetails'):
raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
post_data = {
'UID': auth_info['UID'],
'UIDSignature': auth_info['UIDSignature'],
'signatureTimestamp': auth_info['signatureTimestamp'],
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
}
self._request_webpage(
'https://login.vrt.be/perform_login',
None, note='Performing login', errnote='perform login failed',
headers={}, query={
'client_id': 'vrtnu-site'
}, data=urlencode_postdata(post_data))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
attrs = extract_attributes(self._search_regex(
r'(<nui-media[^>]+>)', webpage, 'media element'))
video_id = attrs['videoid']
publication_id = attrs.get('publicationid')
if publication_id:
video_id = publication_id + '$' + video_id
page = (self._parse_json(self._search_regex(
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
default='{}'), video_id, fatal=False) or {}).get('page') or {}
info = self._search_json_ld(webpage, display_id, default={})
return merge_dicts(info, {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'season_number': int_or_none(page.get('episode_season')),
})
class DagelijkseKostIE(InfoExtractor):
IE_DESC = 'dagelijksekost.een.be'
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
'md5': '30bfffc323009a3e5f689bef6efa2365',
'info_dict': {
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
'display_id': 'hachis-parmentier-met-witloof',
'ext': 'mp4',
'title': 'Hachis parmentier met witloof',
'description': 'md5:9960478392d87f63567b5b117688cdc5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 283.02,
},
'expected_warnings': ['is not a supported codec'],
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = strip_or_none(get_element_by_class(
'dish-metadata__title', webpage
) or self._html_search_meta(
'twitter:title', webpage))
description = clean_html(get_element_by_class(
'dish-description', webpage)
) or self._html_search_meta(
('description', 'twitter:description', 'og:description'),
webpage)
video_id = self._html_search_regex(
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
}

View File

@ -8,14 +8,16 @@ from ..compat import (
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
orderedSet, orderedSet,
parse_iso8601,
smuggle_url, smuggle_url,
strip_or_none, strip_or_none,
traverse_obj,
try_get, try_get,
ExtractorError,
) )
@ -159,7 +161,7 @@ class CBCPlayerIE(InfoExtractor):
'upload_date': '20160210', 'upload_date': '20160210',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
}, },
'skip': 'Geo-restricted to Canada', 'skip': 'Geo-restricted to Canada and no longer available',
}, { }, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'http://www.cbc.ca/player/play/2657631896', 'url': 'http://www.cbc.ca/player/play/2657631896',
@ -172,6 +174,9 @@ class CBCPlayerIE(InfoExtractor):
'timestamp': 1425704400, 'timestamp': 1425704400,
'upload_date': '20150307', 'upload_date': '20150307',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [],
'duration': 494.811,
}, },
}, { }, {
'url': 'http://www.cbc.ca/player/play/2164402062', 'url': 'http://www.cbc.ca/player/play/2164402062',
@ -184,6 +189,28 @@ class CBCPlayerIE(InfoExtractor):
'timestamp': 1320410746, 'timestamp': 1320410746,
'upload_date': '20111104', 'upload_date': '20111104',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [],
'duration': 186.867,
},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2249992771553',
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
'info_dict': {
'id': '2249992771553',
'ext': 'mp4',
'title': 'The National | Womens soccer pay, Florida seawater, Swift quake',
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
'timestamp': 1690596000,
'duration': 2716.333,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'chapters': 'count:5',
'upload_date': '20230729',
}, },
}] }]
@ -197,12 +224,13 @@ class CBCPlayerIE(InfoExtractor):
'force_smil_url': True 'force_smil_url': True
}), }),
'id': video_id, 'id': video_id,
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
} }
class CBCGemIE(InfoExtractor): class CBCGemIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca' IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
_TESTS = [{ _TESTS = [{
# This is a normal, public, TV show video # This is a normal, public, TV show video
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
@ -245,6 +273,9 @@ class CBCGemIE(InfoExtractor):
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada', 'skip': 'Geo-restricted to Canada',
}, {
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
'only_matching': True,
}] }]
_GEO_COUNTRIES = ['CA'] _GEO_COUNTRIES = ['CA']
@ -346,7 +377,9 @@ class CBCGemIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id) video_info = self._download_json(
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
video_id, expected_status=426)
email, password = self._get_login_info() email, password = self._get_login_info()
if email and password: if email and password:
@ -401,7 +434,7 @@ class CBCGemIE(InfoExtractor):
class CBCGemPlaylistIE(InfoExtractor): class CBCGemPlaylistIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca:playlist' IE_NAME = 'gem.cbc.ca:playlist'
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
# TV show playlist, all public videos # TV show playlist, all public videos
'url': 'https://gem.cbc.ca/media/schitts-creek/s06', 'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
@ -411,6 +444,9 @@ class CBCGemPlaylistIE(InfoExtractor):
'title': 'Season 6', 'title': 'Season 6',
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
}, },
}, {
'url': 'https://gem.cbc.ca/schitts-creek/s06',
'only_matching': True,
}] }]
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
@ -418,7 +454,7 @@ class CBCGemPlaylistIE(InfoExtractor):
match = self._match_valid_url(url) match = self._match_valid_url(url)
season_id = match.group('id') season_id = match.group('id')
show = match.group('show') show = match.group('show')
show_info = self._download_json(self._API_BASE + show, season_id) show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
season = int(match.group('season')) season = int(match.group('season'))
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
@ -470,49 +506,90 @@ class CBCGemPlaylistIE(InfoExtractor):
class CBCGemLiveIE(InfoExtractor): class CBCGemLiveIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca:live' IE_NAME = 'gem.cbc.ca:live'
_VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)' _VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
_TEST = { _TESTS = [
'url': 'https://gem.cbc.ca/live/920604739687', {
'info_dict': { 'url': 'https://gem.cbc.ca/live/920604739687',
'title': 'Ottawa', 'info_dict': {
'description': 'The live TV channel and local programming from Ottawa', 'title': 'Ottawa',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', 'description': 'The live TV channel and local programming from Ottawa',
'is_live': True, 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
'id': 'AyqZwxRqh8EH', 'is_live': True,
'ext': 'mp4', 'id': 'AyqZwxRqh8EH',
'timestamp': 1492106160, 'ext': 'mp4',
'upload_date': '20170413', 'timestamp': 1492106160,
'uploader': 'CBCC-NEW', 'upload_date': '20170413',
'uploader': 'CBCC-NEW',
},
'skip': 'Live might have ended',
}, },
'skip': 'Live might have ended', {
} 'url': 'https://gem.cbc.ca/live/44',
'info_dict': {
# It's unclear where the chars at the end come from, but they appear to be 'id': '44',
# constant. Might need updating in the future. 'ext': 'mp4',
# There are two URLs, some livestreams are in one, and some 'is_live': True,
# in the other. The JSON schema is the same for both. 'title': r're:^Ottawa [0-9\-: ]+',
_API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT'] 'description': 'The live TV channel and local programming from Ottawa',
'live_status': 'is_live',
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
},
'params': {'skip_download': True},
'skip': 'Live might have ended',
},
{
'url': 'https://gem.cbc.ca/live-event/10835',
'info_dict': {
'id': '10835',
'ext': 'mp4',
'is_live': True,
'title': r're:^The National \| Bidens trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
'live_status': 'is_live',
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
'timestamp': 1679706000,
'upload_date': '20230325',
},
'params': {'skip_download': True},
'skip': 'Live might have ended',
}
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
for api_url in self._API_URLS: # Two types of metadata JSON
video_info = next(( if not video_info.get('formattedIdMedia'):
stream for stream in self._download_json(api_url, video_id)['entries'] video_info = traverse_obj(
if stream.get('guid') == video_id), None) video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
if video_info: get_all=False, default={})
break
else: video_stream_id = video_info.get('formattedIdMedia')
if not video_stream_id:
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
stream_data = self._download_json(
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
'appCode': 'mpx',
'connectionType': 'hd',
'deviceType': 'ipad',
'idMedia': video_stream_id,
'multibitrate': 'true',
'output': 'json',
'tech': 'hls',
'manifestType': 'desktop',
})
return { return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': video_info['content'][0]['url'],
'id': video_id, 'id': video_id,
'title': video_info.get('title'), 'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
'description': video_info.get('description'),
'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
'thumbnail': video_info.get('cbc$staticImage'),
'is_live': True, 'is_live': True,
**traverse_obj(video_info, {
'title': 'title',
'description': 'description',
'thumbnail': ('images', 'card', 'url'),
'timestamp': ('airDate', {parse_iso8601}),
})
} }

View File

@ -1,8 +1,14 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from .theplatform import ThePlatformFeedIE from .theplatform import ThePlatformFeedIE
from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
extract_attributes,
get_element_html_by_id,
int_or_none, int_or_none,
find_xpath_attr, find_xpath_attr,
smuggle_url,
xpath_element, xpath_element,
xpath_text, xpath_text,
update_url_query, update_url_query,
@ -162,3 +168,110 @@ class CBSIE(CBSBaseIE):
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')), 'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
}) })
class ParamountPressExpressIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
'md5': '56631dbcadaab980d1fc47cb7b76cba4',
'info_dict': {
'id': '6322981580112',
'ext': 'mp4',
'title': 'Im Felicia',
'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
'uploader_id': '6055873637001',
'upload_date': '20230320',
'timestamp': 1679334960,
'duration': 49.557,
'thumbnail': r're:^https://.+\.jpg',
'tags': [],
},
}, {
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
'info_dict': {
'id': '6323036027112',
'ext': 'mp4',
'title': 'Y&R Set Visit: Jerry OConnell Quizzes Cast on Pre-Love Scene Rituals and More',
'description': 'md5:b929867a357aac5544b783d834c78383',
'uploader_id': '6055873637001',
'upload_date': '20230321',
'timestamp': 1679430180,
'duration': 132.032,
'thumbnail': r're:^https://.+\.jpg',
'tags': [],
},
}, {
'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
'info_dict': {
'id': 'OX9wJWOcqck',
'ext': 'mp4',
'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
'uploader': 'Paramount Plus',
'uploader_id': '@paramountplus',
'uploader_url': 'http://www.youtube.com/@paramountplus',
'channel': 'Paramount Plus',
'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
'upload_date': '20230316',
'duration': 88,
'age_limit': 0,
'availability': 'public',
'live_status': 'not_live',
'playable_in_embed': True,
'view_count': int,
'like_count': int,
'channel_follower_count': int,
'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
'categories': ['Entertainment'],
'tags': ['Rugrats'],
},
}, {
'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
'info_dict': {
'id': '_ljssSoDLkw',
'ext': 'mp4',
'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
'description': 'md5:39581bcc3fd810209b642609f448af70',
'uploader': 'SHOWTIME',
'uploader_id': '@Showtime',
'uploader_url': 'http://www.youtube.com/@Showtime',
'channel': 'SHOWTIME',
'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
'upload_date': '20230209',
'duration': 49,
'age_limit': 0,
'availability': 'public',
'live_status': 'not_live',
'playable_in_embed': True,
'view_count': int,
'like_count': int,
'comment_count': int,
'channel_follower_count': int,
'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
'categories': ['People & Blogs'],
'tags': 'count:27',
},
}]
def _real_extract(self, url):
display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
if is_youtube:
return self.url_result(display_id, YoutubeIE)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
account_id = player.get('data-account') or '6055873637001'
player_id = player.get('data-player') or 'OtLKgXlO9F'
embed = player.get('data-embed') or 'default'
return self.url_result(smuggle_url(
f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
{'token': token}), BrightcoveNewIE)

View File

@ -1,116 +0,0 @@
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
from ..utils import (
parse_iso8601,
unified_timestamp,
)
class CBSLocalIE(AnvatoIE): # XXX: Do not subclass from concrete IE
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
mcp_id = self._match_id(url)
return self.url_result(
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
class CBSLocalArticleIE(AnvatoIE): # XXX: Do not subclass from concrete IE
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
'md5': 'f0ee3081e3843f575fccef901199b212',
'info_dict': {
'id': '3401037',
'ext': 'mp4',
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\KCBSTV',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\AOL',
'Syndication\\Yahoo',
'Syndication\\Tribune',
'Syndication\\Curb.tv',
'Content\\News'
],
'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
},
'playlist_count': 9,
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
sendtonews_url = SendtoNewsIE._extract_url(webpage)
if sendtonews_url:
return self.url_result(
compat_urlparse.urljoin(url, sendtonews_url),
ie=SendtoNewsIE.ie_key())
info_dict = self._extract_anvato_videos(webpage, display_id)
timestamp = unified_timestamp(self._html_search_regex(
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
'released date', default=None)) or parse_iso8601(
self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,
'timestamp': timestamp,
})
return info_dict

View File

@ -1,36 +1,153 @@
import base64
import re import re
import urllib.error
import urllib.parse
import zlib import zlib
from .anvato import AnvatoIE
from .common import InfoExtractor from .common import InfoExtractor
from .cbs import CBSIE from .paramountplus import ParamountPlusIE
from ..compat import ( from ..networking import HEADRequest
compat_b64decode,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
ExtractorError,
UserNotLive,
determine_ext,
float_or_none,
format_field,
int_or_none,
make_archive_id,
mimetype2ext,
parse_duration, parse_duration,
smuggle_url,
traverse_obj,
url_or_none,
) )
class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE class CBSNewsBaseIE(InfoExtractor):
_LOCALES = {
'atlanta': None,
'baltimore': 'BAL',
'boston': 'BOS',
'chicago': 'CHI',
'colorado': 'DEN',
'detroit': 'DET',
'losangeles': 'LA',
'miami': 'MIA',
'minnesota': 'MIN',
'newyork': 'NY',
'philadelphia': 'PHI',
'pittsburgh': 'PIT',
'sacramento': 'SAC',
'sanfrancisco': 'SF',
'texas': 'DAL',
}
_LOCALE_RE = '|'.join(map(re.escape, _LOCALES))
_ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl'
def _get_item(self, webpage, display_id):
return traverse_obj(self._search_json(
r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id,
default={}), ('items', 0, {dict})) or {}
def _get_video_url(self, item):
return traverse_obj(item, 'video', 'video2', expected_type=url_or_none)
def _extract_playlist(self, webpage, playlist_id):
entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall(
r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)]
if entries:
return self.playlist_result(
entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage),
self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
def _extract_video(self, item, video_url, video_id):
if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4':
formats = [{'url': video_url, 'ext': 'mp4'}]
else:
manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information')
anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None)
# Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
if anvato_id:
return self.url_result(
smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}),
AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
formats, _ = self._parse_m3u8_formats_and_subtitles(
manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id)
def get_subtitles(subs_url):
return {
'en': [{
'url': subs_url,
'ext': 'dfxp', # TTAF1
}],
} if url_or_none(subs_url) else None
episode_meta = traverse_obj(item, {
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
}) if item.get('isFullEpisode') else {}
return {
'id': video_id,
'formats': formats,
**traverse_obj(item, {
'title': (None, ('fulltitle', 'title')),
'description': 'dek',
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
'duration': ('duration', {float_or_none}),
'subtitles': ('captions', {get_subtitles}),
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
'is_live': ('type', {lambda x: x == 'live'}),
}, get_all=False),
**episode_meta,
}
class CBSNewsEmbedIE(CBSNewsBaseIE):
IE_NAME = 'cbsnews:embed' IE_NAME = 'cbsnews:embed'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
'only_matching': True, 'info_dict': {
'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA',
'ext': 'mp4',
'title': 'Cops investigate gorilla incident at Cincinnati Zoo',
'description': 'md5:fee7441ab8aaeb3c693482394738102b',
'duration': 350,
'timestamp': 1464719713,
'upload_date': '20160531',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
item = self._parse_json(zlib.decompress(compat_b64decode( item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode(
compat_urllib_parse_unquote(self._match_id(url))), urllib.parse.unquote(self._match_id(url))),
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0] -zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {}
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
video_id = item['mpxRefId']
video_url = self._get_video_url(item)
if not video_url:
# Old embeds redirect user to ParamountPlus but most links are 404
pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
try:
self._request_webpage(HEADRequest(pplus_url), video_id)
return self.url_result(pplus_url, ParamountPlusIE)
except ExtractorError:
self.raise_no_formats('This video is no longer available', True, video_id)
return self._extract_video(item, video_url, video_id)
class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE class CBSNewsIE(CBSNewsBaseIE):
IE_NAME = 'cbsnews' IE_NAME = 'cbsnews'
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\w-]+)'
_TESTS = [ _TESTS = [
{ {
@ -47,10 +164,7 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
'timestamp': 1476046464, 'timestamp': 1476046464,
'upload_date': '20161009', 'upload_date': '20161009',
}, },
'params': { 'skip': 'This video is no longer available',
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
@ -61,48 +175,234 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
'upload_date': '20140404', 'upload_date': '20140404',
'timestamp': 1396650660, 'timestamp': 1396650660,
'uploader': 'CBSI-NEW',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 205, 'duration': 205,
'subtitles': { 'subtitles': {
'en': [{ 'en': [{
'ext': 'ttml', 'ext': 'dfxp',
}], }],
}, },
}, },
'params': { 'params': {
# m3u8 download 'skip_download': 'm3u8',
'skip_download': True,
}, },
}, },
{ {
# 48 hours # 48 hours
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
'info_dict': { 'info_dict': {
'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved',
'title': 'Cold as Ice', 'title': 'Cold as Ice',
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
}, },
'playlist_mincount': 7, 'playlist_mincount': 7,
}, },
{
'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/',
'info_dict': {
'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE',
'ext': 'mp4',
'title': 'CBS Evening News, March 28, 2023',
'description': 'md5:db20615aae54adc1d55a1fd69dc75d13',
'duration': 1189,
'timestamp': 1680042600,
'upload_date': '20230328',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 83',
'episode_number': 83,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8',
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
entries = [] playlist = self._extract_playlist(webpage, display_id)
for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): if playlist:
entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) return playlist
if entries:
return self.playlist_result(
entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
item = self._parse_json(self._html_search_regex( item = self._get_item(webpage, display_id)
r'CBSNEWS\.defaultPayload\s*=\s*({.+})', video_id = item.get('mpxRefId') or display_id
webpage, 'video JSON info'), display_id)['items'][0] video_url = self._get_video_url(item)
return self._extract_video_info(item['mpxRefId'], 'cbsnews') if not video_url:
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
return self._extract_video(item, video_url, video_id)
class CBSLocalBaseIE(CBSNewsBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
item = self._get_item(webpage, display_id)
video_id = item.get('mpxRefId') or display_id
anvato_id = None
video_url = self._get_video_url(item)
if not video_url:
anv_params = self._search_regex(
r'<iframe[^>]+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"',
webpage, 'Anvato URL', default=None)
if not anv_params:
playlist = self._extract_playlist(webpage, display_id)
if playlist:
return playlist
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id)
anvato_id = anv_data['v']
return self.url_result(
smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', {
'token': anv_data.get('token') or 'default',
}), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
return self._extract_video(item, video_url, video_id)
class CBSLocalIE(CBSLocalBaseIE):
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P<id>[\w-]+)'
_TESTS = [{
# Anvato video via defaultPayload JSON
'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/',
'info_dict': {
'id': '6376747',
'ext': 'mp4',
'title': '1st cannabis dispensary opens in Queens',
'description': 'The dispensary is women-owned and located in Jamaica.',
'uploader': 'CBS',
'duration': 20,
'timestamp': 1680193657,
'upload_date': '20230330',
'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'],
'tags': 'count:11',
'thumbnail': 're:^https?://.*',
'_old_archive_ids': ['cbslocal 6376747'],
},
'params': {'skip_download': 'm3u8'},
}, {
# cbsnews.com video via defaultPayload JSON
'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/',
'info_dict': {
'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3',
'ext': 'mp4',
'title': 'the city is sounding the alarm on dangerous social media challenges',
'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6',
'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg',
'duration': 41.0,
'timestamp': 1680196615,
'upload_date': '20230330',
},
'params': {'skip_download': 'm3u8'},
}]
class CBSLocalArticleIE(CBSLocalBaseIE):
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P<id>[\w-]+)'
_TESTS = [{
# Anvato video via iframe embed
'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/',
'playlist_count': 2,
'info_dict': {
'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service',
'title': 'MTA station agents begin leaving their booths to provide more direct customer service',
'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.',
},
}, {
'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/',
'md5': 'f0ee3081e3843f575fccef901199b212',
'info_dict': {
'id': '3401037',
'ext': 'mp4',
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
},
'skip': 'Video has been removed',
}]
class CBSNewsLiveBaseIE(CBSNewsBaseIE):
def _get_id(self, url):
raise NotImplementedError('This method must be implemented by subclasses')
def _real_extract(self, url):
video_id = self._get_id(url)
if not video_id:
raise ExtractorError('Livestream is not available', expected=True)
data = traverse_obj(self._download_json(
'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={
'partner': 'cbsnsite',
'edition': video_id,
'type': 'live',
}), ('navigation', 'data', 0, {dict}))
video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False)
if not video_url:
raise UserNotLive(video_id=video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
**traverse_obj(data, {
'title': 'headline',
'description': 'rundown_slug',
'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}),
}),
}
class CBSLocalLiveIE(CBSNewsLiveBaseIE):
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P<id>{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.cbsnews.com/losangeles/live/',
'info_dict': {
'id': 'CBSN-LA',
'ext': 'mp4',
'title': str,
'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+',
'thumbnail': r're:^https?://.*\.jpg$',
'live_status': 'is_live',
},
'params': {'skip_download': 'm3u8'},
}]
def _get_id(self, url):
return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s')
class CBSNewsLiveIE(CBSNewsLiveBaseIE):
IE_NAME = 'cbsnews:live'
IE_DESC = 'CBS News Livestream'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.cbsnews.com/live/',
'info_dict': {
'id': 'CBSN-US',
'ext': 'mp4',
'title': str,
'description': r're:\w+ \w+ CRISPIN RUNDOWN',
'thumbnail': r're:^https?://.*\.jpg$',
'live_status': 'is_live',
},
'params': {'skip_download': 'm3u8'},
}]
def _get_id(self, url):
return 'CBSN-US'
class CBSNewsLiveVideoIE(InfoExtractor): class CBSNewsLiveVideoIE(InfoExtractor):
@ -111,7 +411,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
_TEST = { _TESTS = [{
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
'info_dict': { 'info_dict': {
'id': 'clinton-sanders-prepare-to-face-off-in-nh', 'id': 'clinton-sanders-prepare-to-face-off-in-nh',
@ -120,7 +420,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
'duration': 334, 'duration': 334,
}, },
'skip': 'Video gone', 'skip': 'Video gone',
} }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -131,13 +431,13 @@ class CBSNewsLiveVideoIE(InfoExtractor):
'dvr_slug': display_id, 'dvr_slug': display_id,
}) })
formats = self._extract_akamai_formats(video_info['url'], display_id)
return { return {
'id': display_id, 'id': display_id,
'display_id': display_id, 'display_id': display_id,
'title': video_info['headline'], 'formats': self._extract_akamai_formats(video_info['url'], display_id),
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), **traverse_obj(video_info, {
'duration': parse_duration(video_info.get('segmentDur')), 'title': 'headline',
'formats': formats, 'thumbnail': ('thumbnail_url_hd', {url_or_none}),
'duration': ('segmentDur', {parse_duration}),
}),
} }

View File

@ -1,20 +1,20 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
compat_urllib_parse_unquote, from ..networking import Request
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
urlencode_postdata, urlencode_postdata,
USER_AGENTS,
) )
USER_AGENTS = {
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}
class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, playlist_id) webpage, urlh = self._download_webpage_handle(url, playlist_id)
parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) parsed_url = compat_urllib_parse_urlparse(urlh.url)
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None) playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title: if site_name and playlist_title:
@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
entries = [] entries = []
for user_agent in (None, USER_AGENTS['Safari']): for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request( req = Request(
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data)) data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.headers['Content-type'] = 'application/x-www-form-urlencoded'
req.add_header('x-addr', '127.0.0.1') req.headers['x-addr'] = '127.0.0.1'
req.add_header('X-Requested-With', 'XMLHttpRequest') req.headers['X-Requested-With'] = 'XMLHttpRequest'
if user_agent: if user_agent:
req.add_header('User-Agent', user_agent) req.headers['User-Agent'] = user_agent
req.add_header('Referer', url) req.headers['Referer'] = url
playlistpage = self._download_json(req, playlist_id, fatal=False) playlistpage = self._download_json(req, playlist_id, fatal=False)
@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region': if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req = Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url) req.headers['Referer'] = url
playlist = self._download_json(req, playlist_id, fatal=False) playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist: if not playlist:

View File

@ -1,93 +1,123 @@
import json import base64
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError int_or_none,
traverse_obj,
) )
class ChilloutzoneIE(InfoExtractor): class ChilloutzoneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html' _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w-]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', 'url': 'https://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
'md5': 'a76f3457e813ea0037e5244f509e66d1', 'md5': 'a76f3457e813ea0037e5244f509e66d1',
'info_dict': { 'info_dict': {
'id': 'enemene-meck-alle-katzen-weg', 'id': 'enemene-meck-alle-katzen-weg',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Enemene Meck - Alle Katzen weg', 'title': 'Enemene Meck - Alle Katzen weg',
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
'duration': 24,
}, },
}, { }, {
'note': 'Video hosted at YouTube', 'note': 'Video hosted at YouTube',
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', 'url': 'https://www.chilloutzone.net/video/eine-sekunde-bevor.html',
'info_dict': { 'info_dict': {
'id': '1YVQaAgHyRU', 'id': '1YVQaAgHyRU',
'ext': 'mp4', 'ext': 'mp4',
'title': '16 Photos Taken 1 Second Before Disaster', 'title': '16 Photos Taken 1 Second Before Disaster',
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
'uploader': 'BuzzFeedVideo', 'uploader': 'BuzzFeedVideo',
'uploader_id': 'BuzzFeedVideo', 'uploader_id': '@BuzzFeedVideo',
'upload_date': '20131105', 'upload_date': '20131105',
'availability': 'public',
'thumbnail': 'https://i.ytimg.com/vi/1YVQaAgHyRU/maxresdefault.jpg',
'tags': 'count:41',
'like_count': int,
'playable_in_embed': True,
'channel_url': 'https://www.youtube.com/channel/UCpko_-a4wgz2u_DgDgd9fqA',
'chapters': 'count:6',
'live_status': 'not_live',
'view_count': int,
'categories': ['Entertainment'],
'age_limit': 0,
'channel_id': 'UCpko_-a4wgz2u_DgDgd9fqA',
'duration': 100,
'uploader_url': 'http://www.youtube.com/@BuzzFeedVideo',
'channel_follower_count': int,
'channel': 'BuzzFeedVideo',
}, },
}, { }, {
'note': 'Video hosted at Vimeo', 'url': 'https://www.chilloutzone.net/video/icon-blending.html',
'url': 'http://www.chilloutzone.net/video/icon-blending.html', 'md5': '2f9d6850ec567b24f0f4fa143b9aa2f9',
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
'info_dict': { 'info_dict': {
'id': '85523671', 'id': 'LLNkHpSjBfc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Sunday Times - Icons', 'title': 'The Sunday Times Making of Icons',
'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', 'description': 'md5:b9259fcf63a1669e42001e5db677f02a',
'uploader': 'Us', 'uploader': 'MadFoxUA',
'uploader_id': 'usfilms', 'uploader_id': '@MadFoxUA',
'upload_date': '20140131' 'upload_date': '20140204',
'channel_id': 'UCSZa9Y6-Vl7c11kWMcbAfCw',
'channel_url': 'https://www.youtube.com/channel/UCSZa9Y6-Vl7c11kWMcbAfCw',
'comment_count': int,
'uploader_url': 'http://www.youtube.com/@MadFoxUA',
'duration': 66,
'live_status': 'not_live',
'channel_follower_count': int,
'playable_in_embed': True,
'view_count': int,
'like_count': int,
'thumbnail': 'https://i.ytimg.com/vi/LLNkHpSjBfc/maxresdefault.jpg',
'categories': ['Comedy'],
'availability': 'public',
'tags': [],
'channel': 'MadFoxUA',
'age_limit': 0,
},
}, {
'url': 'https://www.chilloutzone.net/video/ordentlich-abgeschuettelt.html',
'info_dict': {
'id': 'ordentlich-abgeschuettelt',
'ext': 'mp4',
'title': 'Ordentlich abgeschüttelt',
'description': 'md5:d41541966b75d3d1e8ea77a94ea0d329',
'duration': 18,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
b64_data = self._html_search_regex(
r'var cozVidData\s*=\s*"([^"]+)"', webpage, 'video data')
info = self._parse_json(base64.b64decode(b64_data).decode(), video_id)
base64_video_info = self._html_search_regex( video_url = info.get('mediaUrl')
r'var cozVidData = "(.+?)";', webpage, 'video data') native_platform = info.get('nativePlatform')
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
video_info_dict = json.loads(decoded_video_info)
# get video information from dict if native_platform and info.get('sourcePriority') == 'native':
video_url = video_info_dict['mediaUrl'] native_video_id = info['nativeVideoId']
description = clean_html(video_info_dict.get('description'))
title = video_info_dict['title']
native_platform = video_info_dict['nativePlatform']
native_video_id = video_info_dict['nativeVideoId']
source_priority = video_info_dict['sourcePriority']
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN
if source_priority == 'native':
if native_platform == 'youtube': if native_platform == 'youtube':
return self.url_result(native_video_id, ie='Youtube') return self.url_result(native_video_id, 'Youtube')
if native_platform == 'vimeo': elif native_platform == 'vimeo':
return self.url_result( return self.url_result(f'https://vimeo.com/{native_video_id}', 'Vimeo')
'http://vimeo.com/' + native_video_id, ie='Vimeo')
if not video_url: elif not video_url:
raise ExtractorError('No video found') # Possibly a standard youtube embed?
# TODO: Investigate if site still does this (there are no tests for it)
return self.url_result(url, 'Generic')
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'ext': 'mp4', 'ext': 'mp4',
'title': title, **traverse_obj(info, {
'description': description, 'title': 'title',
'description': ('description', {clean_html}),
'duration': ('videoLength', {int_or_none}),
'width': ('videoWidth', {int_or_none}),
'height': ('videoHeight', {int_or_none}),
}),
} }

View File

@ -1,6 +1,6 @@
import json import json
import urllib.error
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
}) })
except ExtractorError as e: except ExtractorError as e:
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500) if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
or isinstance(e.cause, json.JSONDecodeError)): or isinstance(e.cause, json.JSONDecodeError)):
self.raise_login_required(method='cookies') self.raise_login_required(method='cookies')
raise raise

View File

@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
if rcid: if rcid:
webpage = self._download_webpage(url, None, note='Getting video ID') webpage = self._download_webpage(url, None, note='Getting video ID')
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
url = self._request_webpage(url, None, note='Resolving final URL').geturl() url = self._request_webpage(url, None, note='Resolving final URL').url
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
subdomain = mobj.group('subdomain') subdomain = mobj.group('subdomain')
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')

View File

@ -0,0 +1,61 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
traverse_obj,
unified_timestamp,
url_or_none,
)
class ClipchampIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
'info_dict': {
'id': 'gRXZ4ZhdDaU',
'ext': 'mp4',
'title': 'Untitled video',
'uploader': 'Alexander Schwartz',
'timestamp': 1680805580,
'upload_date': '20230406',
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {'skip_download': 'm3u8'},
}]
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
storage_location = data.get('storage_location')
if storage_location != 'cf_stream':
raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
path = data['download_url']
iframe = self._download_webpage(
f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
subdomain = self._search_regex(
r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
formats = self._extract_mpd_formats(
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
formats.extend(self._extract_m3u8_formats(
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
return {
'id': video_id,
'formats': formats,
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
**traverse_obj(data, {
'title': ('project', 'project_name', {str}),
'timestamp': ('created_at', {unified_timestamp}),
'thumbnail': ('thumbnail_url', {url_or_none}),
}),
}

View File

@ -2,7 +2,7 @@ from .mtv import MTVServicesInfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P<id>[0-9a-z]{6})' _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/' _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{ _TESTS = [{
@ -25,6 +25,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb', 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas',
'only_matching': True,
}] }]

View File

@ -13,6 +13,7 @@ import netrc
import os import os
import random import random
import re import re
import subprocess
import sys import sys
import time import time
import types import types
@ -21,9 +22,21 @@ import urllib.request
import xml.etree.ElementTree import xml.etree.ElementTree
from ..compat import functools # isort: split from ..compat import functools # isort: split
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..compat import (
compat_etree_fromstring,
compat_expanduser,
compat_os_name,
urllib_req_to_req,
)
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
from ..downloader.f4m import get_base_url, remove_encrypted_media from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..downloader.hls import HlsFD
from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
network_exceptions,
)
from ..utils import ( from ..utils import (
IDENTITY, IDENTITY,
JSON_LD_RE, JSON_LD_RE,
@ -32,8 +45,8 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
HEADRequest,
LenientJSONDecoder, LenientJSONDecoder,
Popen,
RegexNotFoundError, RegexNotFoundError,
RetryManager, RetryManager,
UnsupportedError, UnsupportedError,
@ -56,7 +69,7 @@ from ..utils import (
join_nonempty, join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
network_exceptions, netrc_from_content,
orderedSet, orderedSet,
parse_bitrate, parse_bitrate,
parse_codecs, parse_codecs,
@ -66,7 +79,6 @@ from ..utils import (
parse_resolution, parse_resolution,
sanitize_filename, sanitize_filename,
sanitize_url, sanitize_url,
sanitized_Request,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
str_to_int, str_to_int,
@ -78,8 +90,6 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_Request,
update_url_query,
url_basename, url_basename,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
@ -132,6 +142,7 @@ class InfoExtractor:
is parsed from a string (in case of is parsed from a string (in case of
fragmented media) fragmented media)
for MSS - URL of the ISM manifest. for MSS - URL of the ISM manifest.
* request_data Data to send in POST request to the URL
* manifest_url * manifest_url
The URL of the manifest file in case of The URL of the manifest file in case of
fragmented media: fragmented media:
@ -219,7 +230,8 @@ class InfoExtractor:
width : height ratio as float. width : height ratio as float.
* no_resume The server does not support resuming the * no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean. (HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean * has_drm True if the format has DRM and cannot be downloaded.
'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each * extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders. with. Only applied by the native HLS/DASH downloaders.
@ -285,6 +297,7 @@ class InfoExtractor:
channel_id: Id of the channel. channel_id: Id of the channel.
channel_url: Full URL to a channel webpage. channel_url: Full URL to a channel webpage.
channel_follower_count: Number of followers of the channel. channel_follower_count: Number of followers of the channel.
channel_is_verified: Whether the channel is verified on the platform.
location: Physical location where the video was filmed. location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format subtitles: The available subtitles as a dictionary in the format
{tag: subformats}. "tag" is usually a language code, and {tag: subformats}. "tag" is usually a language code, and
@ -313,6 +326,11 @@ class InfoExtractor:
* "author" - human-readable name of the comment author * "author" - human-readable name of the comment author
* "author_id" - user ID of the comment author * "author_id" - user ID of the comment author
* "author_thumbnail" - The thumbnail of the comment author * "author_thumbnail" - The thumbnail of the comment author
* "author_url" - The url to the comment author's page
* "author_is_verified" - Whether the author is verified
on the platform
* "author_is_uploader" - Whether the comment is made by
the video uploader
* "id" - Comment ID * "id" - Comment ID
* "html" - Comment as HTML * "html" - Comment as HTML
* "text" - Plain text of the comment * "text" - Plain text of the comment
@ -324,8 +342,8 @@ class InfoExtractor:
* "dislike_count" - Number of negative ratings of the comment * "dislike_count" - Number of negative ratings of the comment
* "is_favorited" - Whether the comment is marked as * "is_favorited" - Whether the comment is marked as
favorite by the video uploader favorite by the video uploader
* "author_is_uploader" - Whether the comment is made by * "is_pinned" - Whether the comment is pinned to
the video uploader the top of the comments
age_limit: Age restriction for the video, as an integer (years) age_limit: Age restriction for the video, as an integer (years)
webpage_url: The URL to the video webpage, if given to yt-dlp it webpage_url: The URL to the video webpage, if given to yt-dlp it
should allow to get the same result again. (It will be set should allow to get the same result again. (It will be set
@ -349,6 +367,10 @@ class InfoExtractor:
* "start_time" - The start time of the chapter in seconds * "start_time" - The start time of the chapter in seconds
* "end_time" - The end time of the chapter in seconds * "end_time" - The end time of the chapter in seconds
* "title" (optional, string) * "title" (optional, string)
heatmap: A list of dictionaries, with the following entries:
* "start_time" - The start time of the data point in seconds
* "end_time" - The end time of the data point in seconds
* "value" - The normalized value of the data point (float between 0 and 1)
playable_in_embed: Whether this video is allowed to play in embedded playable_in_embed: Whether this video is allowed to play in embedded
players on other sites. Can be True (=always allowed), players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string False (=never allowed), None (=unknown), or a string
@ -460,8 +482,8 @@ class InfoExtractor:
Subclasses of this should also be added to the list of extractors and Subclasses of this should also be added to the list of extractors and
should define a _VALID_URL regexp and, re-define the _real_extract() and should define _VALID_URL as a regexp or a Sequence of regexps, and
(optionally) _real_initialize() methods. re-define the _real_extract() and (optionally) _real_initialize() methods.
Subclasses may also override suitable() if necessary, but ensure the function Subclasses may also override suitable() if necessary, but ensure the function
signature is preserved and that this function imports everything it needs signature is preserved and that this function imports everything it needs
@ -524,7 +546,7 @@ class InfoExtractor:
_EMBED_REGEX = [] _EMBED_REGEX = []
def _login_hint(self, method=NO_DEFAULT, netrc=None): def _login_hint(self, method=NO_DEFAULT, netrc=None):
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
return { return {
None: '', None: '',
'any': f'Use --cookies, --cookies-from-browser, {password_hint}', 'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
@ -551,8 +573,8 @@ class InfoExtractor:
# we have cached the regexp for *this* class, whereas getattr would also # we have cached the regexp for *this* class, whereas getattr would also
# match the superclass # match the superclass
if '_VALID_URL_RE' not in cls.__dict__: if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL) cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
return cls._VALID_URL_RE.match(url) return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -707,11 +729,11 @@ class InfoExtractor:
except UnsupportedError: except UnsupportedError:
raise raise
except ExtractorError as e: except ExtractorError as e:
e.video_id = e.video_id or self.get_temp_id(url), e.video_id = e.video_id or self.get_temp_id(url)
e.ie = e.ie or self.IE_NAME, e.ie = e.ie or self.IE_NAME,
e.traceback = e.traceback or sys.exc_info()[2] e.traceback = e.traceback or sys.exc_info()[2]
raise raise
except http.client.IncompleteRead as e: except IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e: except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url)) raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@ -770,20 +792,25 @@ class InfoExtractor:
@staticmethod @staticmethod
def __can_accept_status_code(err, expected_status): def __can_accept_status_code(err, expected_status):
assert isinstance(err, urllib.error.HTTPError) assert isinstance(err, HTTPError)
if expected_status is None: if expected_status is None:
return False return False
elif callable(expected_status): elif callable(expected_status):
return expected_status(err.code) is True return expected_status(err.status) is True
else: else:
return err.code in variadic(expected_status) return err.status in variadic(expected_status)
def _create_request(self, url_or_request, data=None, headers=None, query=None): def _create_request(self, url_or_request, data=None, headers=None, query=None):
if isinstance(url_or_request, urllib.request.Request): if isinstance(url_or_request, urllib.request.Request):
return update_Request(url_or_request, data=data, headers=headers, query=query) self._downloader.deprecation_warning(
if query: 'Passing a urllib.request.Request to _create_request() is deprecated. '
url_or_request = update_url_query(url_or_request, query) 'Use yt_dlp.networking.common.Request instead.')
return sanitized_Request(url_or_request, data, headers or {}) url_or_request = urllib_req_to_req(url_or_request)
elif not isinstance(url_or_request, Request):
url_or_request = Request(url_or_request)
url_or_request.update(data=data, headers=headers, query=query)
return url_or_request
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
""" """
@ -819,14 +846,9 @@ class InfoExtractor:
try: try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query)) return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err: except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError): if isinstance(err, HTTPError):
if self.__can_accept_status_code(err, expected_status): if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from return err.response
# being closed before it can be read. Works around the
# effects of <https://bugs.python.org/issue15002>
# introduced in Python 3.4.1.
err.fp._error = err
return err.fp
if errnote is False: if errnote is False:
return False return False
@ -958,11 +980,11 @@ class InfoExtractor:
if prefix is not None: if prefix is not None:
webpage_bytes = prefix + webpage_bytes webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False): if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl()) self.to_screen('Dumping request to ' + urlh.url)
dump = base64.b64encode(webpage_bytes).decode('ascii') dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump) self._downloader.to_screen(dump)
if self.get_param('write_pages'): if self.get_param('write_pages'):
filename = self._request_dump_filename(urlh.geturl(), video_id) filename = self._request_dump_filename(urlh.url, video_id)
self.to_screen(f'Saving request to {filename}') self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(webpage_bytes) outf.write(webpage_bytes)
@ -1020,7 +1042,7 @@ class InfoExtractor:
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
if self.get_param('load_pages'): if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query) url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.full_url, video_id) filename = self._request_dump_filename(url_or_request.url, video_id)
self.to_screen(f'Loading request from {filename}') self.to_screen(f'Loading request from {filename}')
try: try:
with open(filename, 'rb') as dumpf: with open(filename, 'rb') as dumpf:
@ -1094,7 +1116,7 @@ class InfoExtractor:
while True: while True:
try: try:
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs) return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
except http.client.IncompleteRead as e: except IncompleteRead as e:
try_count += 1 try_count += 1
if try_count >= tries: if try_count >= tries:
raise e raise e
@ -1280,45 +1302,48 @@ class InfoExtractor:
return clean_html(res) return clean_html(res)
def _get_netrc_login_info(self, netrc_machine=None): def _get_netrc_login_info(self, netrc_machine=None):
username = None
password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE netrc_machine = netrc_machine or self._NETRC_MACHINE
if self.get_param('usenetrc', False): cmd = self.get_param('netrc_cmd')
try: if cmd:
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~') cmd = cmd.replace('{}', netrc_machine)
if os.path.isdir(netrc_file): self.to_screen(f'Executing command: {cmd}')
netrc_file = os.path.join(netrc_file, '.netrc') stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine) if ret != 0:
if info is not None: raise OSError(f'Command returned error code {ret}')
username = info[0] info = netrc_from_content(stdout).authenticators(netrc_machine)
password = info[2]
else:
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
return username, password elif self.get_param('usenetrc', False):
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
if os.path.isdir(netrc_file):
netrc_file = os.path.join(netrc_file, '.netrc')
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
else:
return None, None
if not info:
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
""" """
Get the login info as (username, password) Get the login info as (username, password)
First look for the manually specified credentials using username_option First look for the manually specified credentials using username_option
and password_option as keys in params dictionary. If no such credentials and password_option as keys in params dictionary. If no such credentials
available look in the netrc file using the netrc_machine or _NETRC_MACHINE are available try the netrc_cmd if it is defined or look in the
value. netrc file using the netrc_machine or _NETRC_MACHINE value.
If there's no info available, return (None, None) If there's no info available, return (None, None)
""" """
# Attempt to use provided username and password or .netrc data
username = self.get_param(username_option) username = self.get_param(username_option)
if username is not None: if username is not None:
password = self.get_param(password_option) password = self.get_param(password_option)
else: else:
username, password = self._get_netrc_login_info(netrc_machine) try:
username, password = self._get_netrc_login_info(netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(f'Failed to parse .netrc: {err}')
return None, None
return username, password return username, password
def _get_tfa_info(self, note='two-factor verification code'): def _get_tfa_info(self, note='two-factor verification code'):
@ -1338,7 +1363,7 @@ class InfoExtractor:
# Helper functions for extracting OpenGraph info # Helper functions for extracting OpenGraph info
@staticmethod @staticmethod
def _og_regexes(prop): def _og_regexes(prop):
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)' property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
% {'prop': re.escape(prop), 'sep': '(?:&#x3A;|[:-])'}) % {'prop': re.escape(prop), 'sep': '(?:&#x3A;|[:-])'})
template = r'<meta[^>]+?%s[^>]+?%s' template = r'<meta[^>]+?%s[^>]+?%s'
@ -1788,7 +1813,7 @@ class InfoExtractor:
return [] return []
manifest, urlh = res manifest, urlh = res
manifest_url = urlh.geturl() manifest_url = urlh.url
return self._parse_f4m_formats( return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
@ -1947,7 +1972,7 @@ class InfoExtractor:
return [], {} return [], {}
m3u8_doc, urlh = res m3u8_doc, urlh = res
m3u8_url = urlh.geturl() m3u8_url = urlh.url
return self._parse_m3u8_formats_and_subtitles( return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
@ -1961,11 +1986,7 @@ class InfoExtractor:
errnote=None, fatal=True, data=None, headers={}, query={}, errnote=None, fatal=True, data=None, headers={}, query={},
video_id=None): video_id=None):
formats, subtitles = [], {} formats, subtitles = [], {}
has_drm = HlsFD._has_drm(m3u8_doc)
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
]), m3u8_doc)
def format_url(url): def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
@ -2063,6 +2084,7 @@ class InfoExtractor:
'protocol': entry_protocol, 'protocol': entry_protocol,
'preference': preference, 'preference': preference,
'quality': quality, 'quality': quality,
'has_drm': has_drm,
'vcodec': 'none' if media_type == 'AUDIO' else None, 'vcodec': 'none' if media_type == 'AUDIO' else None,
} for idx in _extract_m3u8_playlist_indices(manifest_url)) } for idx in _extract_m3u8_playlist_indices(manifest_url))
@ -2122,6 +2144,7 @@ class InfoExtractor:
'protocol': entry_protocol, 'protocol': entry_protocol,
'preference': preference, 'preference': preference,
'quality': quality, 'quality': quality,
'has_drm': has_drm,
} }
resolution = last_stream_inf.get('RESOLUTION') resolution = last_stream_inf.get('RESOLUTION')
if resolution: if resolution:
@ -2225,18 +2248,10 @@ class InfoExtractor:
if res is False: if res is False:
assert not fatal assert not fatal
return [], {} return [], {}
smil, urlh = res smil, urlh = res
smil_url = urlh.geturl()
namespace = self._parse_smil_namespace(smil) return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
namespace=self._parse_smil_namespace(smil))
fmts = self._parse_smil_formats(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
subs = self._parse_smil_subtitles(
smil, namespace=namespace)
return fmts, subs
def _extract_smil_formats(self, *args, **kwargs): def _extract_smil_formats(self, *args, **kwargs):
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
@ -2250,7 +2265,7 @@ class InfoExtractor:
return {} return {}
smil, urlh = res smil, urlh = res
smil_url = urlh.geturl() smil_url = urlh.url
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
@ -2262,9 +2277,8 @@ class InfoExtractor:
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
namespace = self._parse_smil_namespace(smil) namespace = self._parse_smil_namespace(smil)
formats = self._parse_smil_formats( formats, subtitles = self._parse_smil_formats_and_subtitles(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
video_id = os.path.splitext(url_basename(smil_url))[0] video_id = os.path.splitext(url_basename(smil_url))[0]
title = None title = None
@ -2303,7 +2317,14 @@ class InfoExtractor:
return self._search_regex( return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): def _parse_smil_formats(self, *args, **kwargs):
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('SMIL')
return fmts
def _parse_smil_formats_and_subtitles(
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase') b = meta.get('base') or meta.get('httpBase')
@ -2311,7 +2332,7 @@ class InfoExtractor:
base = b base = b
break break
formats = [] formats, subtitles = [], {}
rtmp_count = 0 rtmp_count = 0
http_count = 0 http_count = 0
m3u8_count = 0 m3u8_count = 0
@ -2359,8 +2380,9 @@ class InfoExtractor:
src_url = src_url.strip() src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8': if proto == 'm3u8' or src_ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats( m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
self._merge_subtitles(m3u8_subs, target=subtitles)
if len(m3u8_formats) == 1: if len(m3u8_formats) == 1:
m3u8_count += 1 m3u8_count += 1
m3u8_formats[0].update({ m3u8_formats[0].update({
@ -2381,11 +2403,15 @@ class InfoExtractor:
f4m_url += urllib.parse.urlencode(f4m_params) f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd': elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats( mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
src_url, video_id, mpd_id='dash', fatal=False)) src_url, video_id, mpd_id='dash', fatal=False)
formats.extend(mpd_formats)
self._merge_subtitles(mpd_subs, target=subtitles)
elif re.search(r'\.ism/[Mm]anifest', src_url): elif re.search(r'\.ism/[Mm]anifest', src_url):
formats.extend(self._extract_ism_formats( ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
src_url, video_id, ism_id='mss', fatal=False)) src_url, video_id, ism_id='mss', fatal=False)
formats.extend(ism_formats)
self._merge_subtitles(ism_subs, target=subtitles)
elif src_url.startswith('http') and self._is_valid_url(src, video_id): elif src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1 http_count += 1
formats.append({ formats.append({
@ -2416,7 +2442,10 @@ class InfoExtractor:
'format_note': 'SMIL storyboards', 'format_note': 'SMIL storyboards',
}) })
return formats smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
self._merge_subtitles(smil_subs, target=subtitles)
return formats, subtitles
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
urls = [] urls = []
@ -2442,7 +2471,7 @@ class InfoExtractor:
return [] return []
xspf, urlh = res xspf, urlh = res
xspf_url = urlh.geturl() xspf_url = urlh.url
return self._parse_xspf( return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url, xspf, playlist_id, xspf_url=xspf_url,
@ -2513,7 +2542,7 @@ class InfoExtractor:
return [], {} return [], {}
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.geturl() mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_formats_and_subtitles(
@ -2884,7 +2913,7 @@ class InfoExtractor:
if ism_doc is None: if ism_doc is None:
return [], {} return [], {}
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id) return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
""" """
@ -2980,6 +3009,8 @@ class InfoExtractor:
'protocol': 'ism', 'protocol': 'ism',
'fragments': fragments, 'fragments': fragments,
'has_drm': ism_doc.find('Protection') is not None, 'has_drm': ism_doc.find('Protection') is not None,
'language': stream_language,
'audio_channels': int_or_none(track.get('Channels')),
'_download_params': { '_download_params': {
'stream_type': stream_type, 'stream_type': stream_type,
'duration': duration, 'duration': duration,
@ -3435,7 +3466,7 @@ class InfoExtractor:
def _get_cookies(self, url): def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """ """ Return a http.cookies.SimpleCookie with the cookies for the url """
return LenientSimpleCookie(self._downloader._calc_cookies(url)) return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
def _apply_first_set_cookie_header(self, url_handle, cookie): def _apply_first_set_cookie_header(self, url_handle, cookie):
""" """
@ -3510,8 +3541,8 @@ class InfoExtractor:
@classmethod @classmethod
def is_single_video(cls, url): def is_single_video(cls, url):
"""Returns whether the URL is of a single video, None if unknown""" """Returns whether the URL is of a single video, None if unknown"""
assert cls.suitable(url), 'The URL must be suitable for the extractor' if cls.suitable(url):
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE) return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
@classmethod @classmethod
def is_suitable(cls, age_limit): def is_suitable(cls, age_limit):
@ -3524,7 +3555,7 @@ class InfoExtractor:
desc = '' desc = ''
if cls._NETRC_MACHINE: if cls._NETRC_MACHINE:
if markdown: if markdown:
desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]' desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")'
else: else:
desc += f' [{cls._NETRC_MACHINE}]' desc += f' [{cls._NETRC_MACHINE}]'
if cls.IE_DESC is False: if cls.IE_DESC is False:
@ -3646,6 +3677,42 @@ class InfoExtractor:
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
or default) or default)
def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
if not duration:
return
chapter_list = [{
'start_time': start_function(chapter),
'title': title_function(chapter),
} for chapter in chapter_list or []]
if strict:
warn = self.report_warning
else:
warn = self.write_debug
chapter_list.sort(key=lambda c: c['start_time'] or 0)
chapters = [{'start_time': 0}]
for idx, chapter in enumerate(chapter_list):
if chapter['start_time'] is None:
warn(f'Incomplete chapter {idx}')
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
chapters.append(chapter)
elif chapter not in chapters:
issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
return chapters[1:]
def _extract_chapters_from_description(self, description, duration):
duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
return self._extract_chapters_helper(
re.findall(sep_re % (duration_re, r'.+?'), description or ''),
start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
duration=duration, strict=False) or self._extract_chapters_helper(
re.findall(sep_re % (r'.+?', duration_re), description or ''),
start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
duration=duration, strict=False)
@staticmethod @staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
all_known = all(map( all_known = all(map(

View File

@ -4,7 +4,7 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
errnote='Unable to download media JSON') errnote='Unable to download media JSON')
except ExtractorError as e: except ExtractorError as e:
# 401 means geo restriction, trying next country # 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
continue continue
raise raise

View File

@ -0,0 +1,34 @@
from .common import InfoExtractor
from ..utils import remove_end
class CrtvgIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?crtvg\.es/tvg/a-carta/[^/#?]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.crtvg.es/tvg/a-carta/os-caimans-do-tea-5839623',
'md5': 'c0958d9ff90e4503a75544358758921d',
'info_dict': {
'id': '5839623',
'title': 'Os caimáns do Tea',
'ext': 'mp4',
'description': 'md5:f71cfba21ae564f0a6f415b31de1f842',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'params': {'skip_download': 'm3u8'}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'var\s+url\s*=\s*["\']([^"\']+)', webpage, 'video url')
formats = self._extract_m3u8_formats(video_url + '/playlist.m3u8', video_id, fatal=False)
formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False))
return {
'id': video_id,
'formats': formats,
'title': remove_end(self._html_search_meta(
['og:title', 'twitter:title'], webpage, 'title', default=None), ' | CRTVG'),
'description': self._html_search_meta('description', webpage, 'description', default=None),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, 'thumbnail', default=None),
}

View File

@ -1,28 +1,50 @@
import base64 import base64
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
format_field, format_field,
int_or_none,
join_nonempty, join_nonempty,
parse_age_limit,
parse_count,
parse_iso8601, parse_iso8601,
qualities, qualities,
remove_start,
time_seconds,
traverse_obj, traverse_obj,
try_get, url_or_none,
urlencode_postdata,
) )
class CrunchyrollBaseIE(InfoExtractor): class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login' _BASE_URL = 'https://www.crunchyroll.com'
_API_BASE = 'https://api.crunchyroll.com' _API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll' _NETRC_MACHINE = 'crunchyroll'
params = None _AUTH_HEADERS = None
_API_ENDPOINT = None
_BASIC_AUTH = None
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
'': 'en-US',
'es': 'es-419',
'es-es': 'es-ES',
'fr': 'fr-FR',
'it': 'it-IT',
'pt-br': 'pt-BR',
'pt-pt': 'pt-PT',
'ru': 'ru-RU',
'hi': 'hi-IN',
}
@property @property
def is_logged_in(self): def is_logged_in(self):
return self._get_cookies(self._LOGIN_URL).get('etp_rt') return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
def _perform_login(self, username, password): def _perform_login(self, username, password):
if self.is_logged_in: if self.is_logged_in:
@ -35,7 +57,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'device_id': 'whatvalueshouldbeforweb', 'device_id': 'whatvalueshouldbeforweb',
'device_type': 'com.crunchyroll.static', 'device_type': 'com.crunchyroll.static',
'access_token': 'giKq5eY27ny3cqz', 'access_token': 'giKq5eY27ny3cqz',
'referer': self._LOGIN_URL 'referer': f'{self._BASE_URL}/welcome/login'
}) })
if upsell_response['code'] != 'ok': if upsell_response['code'] != 'ok':
raise ExtractorError('Could not get session id') raise ExtractorError('Could not get session id')
@ -43,149 +65,89 @@ class CrunchyrollBaseIE(InfoExtractor):
login_response = self._download_json( login_response = self._download_json(
f'{self._API_BASE}/login.1.json', None, 'Logging in', f'{self._API_BASE}/login.1.json', None, 'Logging in',
data=urllib.parse.urlencode({ data=urlencode_postdata({
'account': username, 'account': username,
'password': password, 'password': password,
'session_id': session_id 'session_id': session_id
}).encode('ascii')) }))
if login_response['code'] != 'ok': if login_response['code'] != 'ok':
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True) raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
if not self.is_logged_in: if not self.is_logged_in:
raise ExtractorError('Login succeeded but did not set etp_rt cookie') raise ExtractorError('Login succeeded but did not set etp_rt cookie')
def _get_embedded_json(self, webpage, display_id): def _update_auth(self):
initial_state = self._parse_json(self._search_regex( if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) return
app_config = self._parse_json(self._search_regex(
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
return initial_state, app_config
def _get_params(self, lang): if not CrunchyrollBaseIE._BASIC_AUTH:
if not CrunchyrollBaseIE.params: cx_api_param = self._CLIENT_ID[self.is_logged_in]
if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'): self.write_debug(f'Using cxApiParam={cx_api_param}')
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId' CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
else:
grant_type, key = 'client_id', 'anonClientId'
initial_state, app_config = self._get_embedded_json(self._download_webpage(
f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
try:
auth_response = self._download_json( auth_response = self._download_json(
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers={ headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii') except ExtractorError as error:
}, data=f'grant_type={grant_type}'.encode('ascii')) if isinstance(error.cause, HTTPError) and error.cause.status == 403:
policy_response = self._download_json( raise ExtractorError(
f'{api_domain}/index/v2', None, note='Retrieving signed policy', 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
headers={ 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] 'and your browser\'s User-Agent (with --user-agent)', expected=True)
}) raise
cms = policy_response.get('cms_web')
bucket = cms['bucket']
params = {
'Policy': cms['policy'],
'Signature': cms['signature'],
'Key-Pair-Id': cms['key_pair_id']
}
locale = traverse_obj(initial_state, ('localization', 'locale'))
if locale:
params['locale'] = locale
CrunchyrollBaseIE.params = (api_domain, bucket, params)
return CrunchyrollBaseIE.params
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
class CrunchyrollBetaIE(CrunchyrollBaseIE): def _locale_from_language(self, language):
IE_NAME = 'crunchyroll' config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
_VALID_URL = r'''(?x) return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
https?://(?:beta|www)\.crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
watch/(?P<id>\w+)
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
_TESTS = [{
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
'info_dict': {
'id': 'GY2P1Q98Y',
'ext': 'mp4',
'duration': 1380.241,
'timestamp': 1459632600,
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
'title': 'World Trigger Episode 73 To the Future',
'upload_date': '20160402',
'series': 'World Trigger',
'series_id': 'GR757DMKY',
'season': 'World Trigger',
'season_id': 'GR9P39NJ6',
'season_number': 1,
'episode': 'To the Future',
'episode_number': 73,
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
'chapters': 'count:2',
},
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
}, {
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
'info_dict': {
'id': 'GYE5WKQGR',
'ext': 'mp4',
'duration': 366.459,
'timestamp': 1476788400,
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
'title': 'SHELTER Episode Porter Robinson presents Shelter the Animation',
'upload_date': '20161018',
'series': 'SHELTER',
'series_id': 'GYGG09WWY',
'season': 'SHELTER',
'season_id': 'GR09MGK4R',
'season_number': 1,
'episode': 'Porter Robinson presents Shelter the Animation',
'episode_number': 0,
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
'chapters': 'count:0',
},
'params': {'skip_download': True},
'skip': 'Video is Premium only',
}, {
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
'only_matching': True,
}, {
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
'only_matching': True,
}]
def _real_extract(self, url): def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') self._update_auth()
api_domain, bucket, params = self._get_params(lang)
episode_response = self._download_json( if not endpoint.startswith('/'):
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, endpoint = f'/{endpoint}'
note='Retrieving episode metadata', query=params)
if episode_response.get('is_premium_only') and not bucket.endswith('crunchyroll'):
if self.is_logged_in:
raise ExtractorError('This video is for premium members only', expected=True)
else:
self.raise_login_required('This video is for premium members only')
stream_response = self._download_json( query = query.copy()
f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id, locale = self._locale_from_language(lang)
note='Retrieving stream info', query=params) if locale:
get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items() query['locale'] = locale
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] return self._download_json(
hardsub_preference = qualities(requested_hardsubs[::-1]) f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
def _call_api(self, path, internal_id, lang, note='api', query={}):
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
path = f'/content/v2/{self._API_ENDPOINT}/{path}'
try:
result = self._call_base_api(
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
return None
raise
if not result:
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
return result
def _extract_formats(self, stream_response, display_id=None):
requested_formats = self._configuration_arg('format') or ['adaptive_hls'] requested_formats = self._configuration_arg('format') or ['adaptive_hls']
available_formats = {} available_formats = {}
for stream_type, streams in get_streams('streams'): for stream_type, streams in traverse_obj(
stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
if stream_type not in requested_formats: if stream_type not in requested_formats:
continue continue
for stream in streams.values(): for stream in traverse_obj(streams, lambda _, v: v['url']):
if not stream.get('url'):
continue
hardsub_lang = stream.get('hardsub_locale') or '' hardsub_lang = stream.get('hardsub_locale') or ''
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s')) format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url']) available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
if '' in available_formats and 'all' not in requested_hardsubs: if '' in available_formats and 'all' not in requested_hardsubs:
full_format_langs = set(requested_hardsubs) full_format_langs = set(requested_hardsubs)
self.to_screen( self.to_screen(
@ -196,6 +158,8 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
else: else:
full_format_langs = set(map(str.lower, available_formats)) full_format_langs = set(map(str.lower, available_formats))
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
hardsub_preference = qualities(requested_hardsubs[::-1])
formats = [] formats = []
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values(): for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
if stream_type.endswith('hls'): if stream_type.endswith('hls'):
@ -214,63 +178,292 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
continue continue
for f in adaptive_formats: for f in adaptive_formats:
if f.get('acodec') != 'none': if f.get('acodec') != 'none':
f['language'] = stream_response.get('audio_locale') f['language'] = audio_locale
f['quality'] = hardsub_preference(hardsub_lang.lower()) f['quality'] = hardsub_preference(hardsub_lang.lower())
formats.extend(adaptive_formats) formats.extend(adaptive_formats)
chapters = None return formats
def _extract_subtitles(self, data):
subtitles = {}
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
return subtitles
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
_API_ENDPOINT = 'cms'
_CMS_EXPIRY = None
def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
CrunchyrollCmsBaseIE._CMS_QUERY = {
'Policy': response['policy'],
'Signature': response['signature'],
'Key-Pair-Id': response['key_pair_id'],
}
CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
if not path.startswith('/cms/v2'):
path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
return self._call_base_api(
path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
IE_NAME = 'crunchyroll'
_VALID_URL = r'''(?x)
https?://(?:beta\.|www\.)?crunchyroll\.com/
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
watch/(?!concert|musicvideo)(?P<id>\w+)'''
_TESTS = [{
# Premium only
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
'info_dict': {
'id': 'GY2P1Q98Y',
'ext': 'mp4',
'duration': 1380.241,
'timestamp': 1459632600,
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
'title': 'World Trigger Episode 73 To the Future',
'upload_date': '20160402',
'series': 'World Trigger',
'series_id': 'GR757DMKY',
'season': 'World Trigger',
'season_id': 'GR9P39NJ6',
'season_number': 1,
'episode': 'To the Future',
'episode_number': 73,
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'chapters': 'count:2',
'age_limit': 14,
'like_count': int,
'dislike_count': int,
},
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
}, {
# Premium only
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
'info_dict': {
'id': 'GYE5WKQGR',
'ext': 'mp4',
'duration': 366.459,
'timestamp': 1476788400,
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
'title': 'SHELTER Porter Robinson presents Shelter the Animation',
'upload_date': '20161018',
'series': 'SHELTER',
'series_id': 'GYGG09WWY',
'season': 'SHELTER',
'season_id': 'GR09MGK4R',
'season_number': 1,
'episode': 'Porter Robinson presents Shelter the Animation',
'episode_number': 0,
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'age_limit': 14,
'like_count': int,
'dislike_count': int,
},
'params': {'skip_download': True},
}, {
'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
'info_dict': {
'id': 'GJWU2VKK3',
'ext': 'mp4',
'duration': 1420.054,
'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
'title': 'The Ice Guy and His Cool Female Colleague Episode 1 Cherry Blossom Meeting and a Coming Blizzard',
'series': 'The Ice Guy and His Cool Female Colleague',
'series_id': 'GW4HM75NP',
'season': 'The Ice Guy and His Cool Female Colleague',
'season_id': 'GY9PC21VE',
'season_number': 1,
'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
'episode_number': 1,
'chapters': 'count:2',
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'timestamp': 1672839000,
'upload_date': '20230104',
'age_limit': 14,
'like_count': int,
'dislike_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
'info_dict': {
'id': 'GM8F313NQ',
'ext': 'mp4',
'title': 'Garakowa -Restore the World-',
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
'duration': 3996.104,
'age_limit': 13,
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
'info_dict': {
'id': 'G62PEZ2E6',
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
'age_limit': 13,
'duration': 65.138,
'title': 'Garakowa -Restore the World-',
},
'playlist_mincount': 5,
}, {
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
'only_matching': True,
}, {
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
'only_matching': True,
}]
# We want to support lazy playlist filtering and movie listings cannot be inside a playlist
_RETURN_TYPE = 'video'
def _real_extract(self, url):
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
# We need to use unsigned API call to allow ratings query string
response = traverse_obj(self._call_api(
f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
if not response:
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
object_type = response.get('type')
if object_type == 'episode':
result = self._transform_episode_response(response)
elif object_type == 'movie':
result = self._transform_movie_response(response)
elif object_type == 'movie_listing':
first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
if not self._yes_playlist(internal_id, first_movie_id):
return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
def entries():
movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
for movie_response in traverse_obj(movies, ('data', ...)):
yield self.url_result(
f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
return self.playlist_result(entries(), **self._transform_movie_response(response))
else:
raise ExtractorError(f'Unknown object type {object_type}')
# There might be multiple audio languages for one object (`<object>_metadata.versions`),
# so we need to get the id from `streams_link` instead or we dont know which language to choose
streams_link = response.get('streams_link')
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
message = f'This {object_type} is for premium members only'
if self.is_logged_in:
raise ExtractorError(message, expected=True)
self.raise_login_required(message)
# We need go from unsigned to signed api to avoid getting soft banned
stream_response = self._call_cms_api_signed(remove_start(
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
result['formats'] = self._extract_formats(stream_response, internal_id)
result['subtitles'] = self._extract_subtitles(stream_response)
# if no intro chapter is available, a 403 without usable data is returned # if no intro chapter is available, a 403 without usable data is returned
intro_chapter = self._download_json(f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json', intro_chapter = self._download_json(
display_id, fatal=False, errnote=False) f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
if isinstance(intro_chapter, dict): if isinstance(intro_chapter, dict):
chapters = [{ result['chapters'] = [{
'title': 'Intro', 'title': 'Intro',
'start_time': float_or_none(intro_chapter.get('startTime')), 'start_time': float_or_none(intro_chapter.get('startTime')),
'end_time': float_or_none(intro_chapter.get('endTime')) 'end_time': float_or_none(intro_chapter.get('endTime')),
}] }]
def calculate_count(item):
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
result.update(traverse_obj(response, ('rating', {
'like_count': ('up', {calculate_count}),
'dislike_count': ('down', {calculate_count}),
})))
return result
@staticmethod
def _transform_episode_response(data):
metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
return { return {
'id': internal_id, 'id': data['id'],
'title': '%s Episode %s %s' % ( 'title': ' \u2013 '.join((
episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), ('%s%s' % (
'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')), format_field(metadata, 'season_title'),
'duration': float_or_none(episode_response.get('duration_ms'), 1000), format_field(metadata, 'episode', ' Episode %s'))),
'timestamp': parse_iso8601(episode_response.get('upload_date')), format_field(data, 'title'))),
'series': episode_response.get('series_title'), **traverse_obj(data, {
'series_id': episode_response.get('series_id'), 'episode': ('title', {str}),
'season': episode_response.get('season_title'), 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
'season_id': episode_response.get('season_id'), 'thumbnails': ('images', 'thumbnail', ..., ..., {
'season_number': episode_response.get('season_number'), 'url': ('source', {url_or_none}),
'episode': episode_response.get('title'), 'width': ('width', {int_or_none}),
'episode_number': episode_response.get('sequence_number'), 'height': ('height', {int_or_none}),
'formats': formats, }),
'thumbnails': [{ }),
'url': thumb.get('source'), **traverse_obj(metadata, {
'width': thumb.get('width'), 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
'height': thumb.get('height'), 'timestamp': ('upload_date', {parse_iso8601}),
} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []], 'series': ('series_title', {str}),
'subtitles': { 'series_id': ('series_id', {str}),
lang: [{ 'season': ('season_title', {str}),
'url': subtitle_data.get('url'), 'season_id': ('season_id', {str}),
'ext': subtitle_data.get('format') 'season_number': ('season_number', ({int}, {float_or_none})),
}] for lang, subtitle_data in get_streams('subtitles') 'episode_number': ('sequence_number', ({int}, {float_or_none})),
}, 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
'chapters': chapters 'language': ('audio_locale', {str}),
}, get_all=False),
}
@staticmethod
def _transform_movie_response(data):
metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
return {
'id': data['id'],
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
'thumbnails': ('images', 'thumbnail', ..., ..., {
'url': ('source', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
}),
**traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}),
} }
class CrunchyrollBetaShowIE(CrunchyrollBaseIE): class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
IE_NAME = 'crunchyroll:playlist' IE_NAME = 'crunchyroll:playlist'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:beta|www)\.crunchyroll\.com/ https?://(?:beta\.|www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?) (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
series/(?P<id>\w+) series/(?P<id>\w+)'''
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
'info_dict': { 'info_dict': {
'id': 'GY19NQ2QR', 'id': 'GY19NQ2QR',
'title': 'Girl Friend BETA', 'title': 'Girl Friend BETA',
'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
# XXX: `thumbnail` does not get set from `thumbnails` in playlist
# 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'age_limit': 14,
}, },
'playlist_mincount': 10, 'playlist_mincount': 10,
}, { }, {
@ -279,41 +472,179 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') lang, internal_id = self._match_valid_url(url).group('lang', 'id')
api_domain, bucket, params = self._get_params(lang)
series_response = self._download_json(
f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
note='Retrieving series metadata', query=params)
seasons_response = self._download_json(
f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
note='Retrieving season list', query=params)
def entries(): def entries():
for season in seasons_response['items']: seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
episodes_response = self._download_json( for season in traverse_obj(seasons_response, ('items', ..., {dict})):
f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id, episodes_response = self._call_cms_api_signed(
note=f'Retrieving episode list for {season.get("slug_title")}', query=params) f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
for episode in episodes_response['items']: for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
episode_id = episode['id'] yield self.url_result(
episode_display_id = episode['slug_title'] f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
yield { CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
'_type': 'url',
'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
'ie_key': CrunchyrollBetaIE.ie_key(),
'id': episode_id,
'title': '%s Episode %s %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
'duration': float_or_none(episode.get('duration_ms'), 1000),
'series': episode.get('series_title'),
'series_id': episode.get('series_id'),
'season': episode.get('season_title'),
'season_id': episode.get('season_id'),
'season_number': episode.get('season_number'),
'episode': episode.get('title'),
'episode_number': episode.get('sequence_number'),
'language': episode.get('audio_locale'),
}
return self.playlist_result(entries(), internal_id, series_response.get('title')) return self.playlist_result(
entries(), internal_id,
**traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
'title': ('title', {str}),
'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
'thumbnails': ('images', ..., ..., ..., {
'url': ('source', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})
})))
class CrunchyrollMusicIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:music'
_VALID_URL = r'''(?x)
https?://(?:www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
_TESTS = [{
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
'info_dict': {
'ext': 'mp4',
'id': 'MV5B02C79',
'display_id': 'egaono-hana',
'title': 'Egaono Hana',
'track': 'Egaono Hana',
'artist': 'Goose house',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
'info_dict': {
'ext': 'mp4',
'id': 'MV88BB7F2C',
'display_id': 'crossing-field',
'title': 'Crossing Field',
'track': 'Crossing Field',
'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['Anime'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
'info_dict': {
'ext': 'mp4',
'id': 'MC2E2AC135',
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
'genre': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
'only_matching': True,
}]
_API_ENDPOINT = 'music'
def _real_extract(self, url):
lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
path, name = {
'concert': ('concerts', 'concert info'),
'musicvideo': ('music_videos', 'music video info'),
}[object_type]
response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
if not response:
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
streams_link = response.get('streams_link')
if not streams_link and response.get('isPremiumOnly'):
message = f'This {response.get("type") or "media"} is for premium members only'
if self.is_logged_in:
raise ExtractorError(message, expected=True)
self.raise_login_required(message)
result = self._transform_music_response(response)
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
result['formats'] = self._extract_formats(stream_response, internal_id)
return result
@staticmethod
def _transform_music_response(data):
return {
'id': data['id'],
**traverse_obj(data, {
'display_id': 'slug',
'title': 'title',
'track': 'title',
'artist': ('artist', 'name'),
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
'thumbnails': ('images', ..., ..., {
'url': ('source', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
'genre': ('genres', ..., 'displayValue'),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}),
}
class CrunchyrollArtistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:artist'
_VALID_URL = r'''(?x)
https?://(?:www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
artist/(?P<id>\w{10})'''
_TESTS = [{
'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
'info_dict': {
'id': 'MA179CB50D',
'title': 'LiSA',
'genre': ['J-Pop', 'Anime', 'Rock'],
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
},
'playlist_mincount': 83,
}, {
'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
'only_matching': True,
}]
_API_ENDPOINT = 'music'
def _real_extract(self, url):
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
response = traverse_obj(self._call_api(
f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
def entries():
for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
for internal_id in traverse_obj(response, (attribute, ...)):
yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
return self.playlist_result(entries(), **self._transform_artist_response(response))
@staticmethod
def _transform_artist_response(data):
return {
'id': data['id'],
**traverse_obj(data, {
'title': 'name',
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
'thumbnails': ('images', ..., ..., {
'url': ('source', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
'genre': ('genres', ..., 'displayValue'),
}),
}

View File

@ -1,10 +1,8 @@
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..networking import HEADRequest
int_or_none, from ..utils import int_or_none
HEADRequest,
)
class CultureUnpluggedIE(InfoExtractor): class CultureUnpluggedIE(InfoExtractor):

View File

@ -0,0 +1,158 @@
import hashlib
import re
import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
classproperty,
float_or_none,
traverse_obj,
url_or_none,
)
class DacastBaseIE(InfoExtractor):
_URL_TYPE = None
@classproperty
def _VALID_URL(cls):
return fr'https?://iframe\.dacast\.com/{cls._URL_TYPE}/(?P<user_id>[\w-]+)/(?P<id>[\w-]+)'
@classproperty
def _EMBED_REGEX(cls):
return [rf'<iframe[^>]+\bsrc=["\'](?P<url>{cls._VALID_URL})']
_API_INFO_URL = 'https://playback.dacast.com/content/info'
@classmethod
def _get_url_from_id(cls, content_id):
user_id, media_id = content_id.split(f'-{cls._URL_TYPE}-')
return f'https://iframe.dacast.com/{cls._URL_TYPE}/{user_id}/{media_id}'
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for content_id in re.findall(
rf'<script[^>]+\bsrc=["\']https://player\.dacast\.com/js/player\.js\?contentId=([\w-]+-{cls._URL_TYPE}-[\w-]+)["\']', webpage):
yield cls._get_url_from_id(content_id)
class DacastVODIE(DacastBaseIE):
_URL_TYPE = 'vod'
_TESTS = [{
'url': 'https://iframe.dacast.com/vod/acae82153ef4d7a7344ae4eaa86af534/1c6143e3-5a06-371d-8695-19b96ea49090',
'info_dict': {
'id': '1c6143e3-5a06-371d-8695-19b96ea49090',
'ext': 'mp4',
'uploader_id': 'acae82153ef4d7a7344ae4eaa86af534',
'title': '2_4||Adnexal mass characterisation: O-RADS US and MRI||N. Bharwani, London/UK',
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
},
'params': {'skip_download': 'm3u8'},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
'info_dict': {
'id': 'b6674869-f08a-23c5-1d7b-81f5309e1a90',
'ext': 'mp4',
'title': '4-HowToEmbedVideo.mp4',
'uploader_id': '3b67c4a9-3886-4eb1-d0eb-39b23b14bef3',
'thumbnail': 'https://universe-files.dacast.com/d26ab48f-a52a-8783-c42e-a90290ba06b6.png',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://gist.githubusercontent.com/bashonly/4ad249ef2910346fbdf3809b220f11ee/raw/87349778d4af1a80b1fcc3beb9c88108de5858f5/dacast_embeds.html',
'info_dict': {
'id': 'e7df418e-a83b-7a7f-7b5e-1a667981e8fa',
'ext': 'mp4',
'title': 'Evening Service 2-5-23',
'uploader_id': '943bb1ab3c03695ba85330d92d6d226e',
'thumbnail': 'https://universe-files.dacast.com/337472b3-e92c-2ea4-7eb7-5700da477f67',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
access = self._download_json(
'https://playback.dacast.com/content/access', video_id,
note='Downloading access JSON', query=query, expected_status=403)
error = access.get('error')
if error in ('Broadcaster has been blocked', 'Content is offline'):
raise ExtractorError(error, expected=True)
elif error:
raise ExtractorError(f'Dacast API says "{error}"')
hls_url = access['hls']
hls_aes = {}
if 'DRM_EXT' in hls_url:
self.report_drm(video_id)
elif '/uspaes/' in hls_url:
# From https://player.dacast.com/js/player.js
ts = int(time.time())
signature = hashlib.sha1(
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
for retry in self.RetryManager():
try:
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
except ExtractorError as e:
# CDN will randomly respond with 403
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
retry.error = e
continue
raise
return {
'id': video_id,
'uploader_id': user_id,
'formats': formats,
'hls_aes': hls_aes or None,
**traverse_obj(info, ('contentInfo', {
'title': 'title',
'duration': ('duration', {float_or_none}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
})),
}
class DacastPlaylistIE(DacastBaseIE):
_URL_TYPE = 'playlist'
_TESTS = [{
'url': 'https://iframe.dacast.com/playlist/943bb1ab3c03695ba85330d92d6d226e/b632eb053cac17a9c9a02bcfc827f2d8',
'playlist_mincount': 28,
'info_dict': {
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
'title': 'Archive Sermons',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://gist.githubusercontent.com/bashonly/7efb606f49f3c6e07ea0327de5a661d1/raw/05a16eac830245ea301fb0a585023bec71e6093c/dacast_playlist_embed.html',
'playlist_mincount': 28,
'info_dict': {
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
'title': 'Archive Sermons',
},
}]
def _real_extract(self, url):
user_id, playlist_id = self._match_valid_url(url).group('user_id', 'id')
info = self._download_json(
self._API_INFO_URL, playlist_id, note='Downloading playlist JSON', query={
'contentId': f'{user_id}-playlist-{playlist_id}',
'provider': 'universe',
})['contentInfo']
def entries(info):
for video in traverse_obj(info, ('features', 'playlist', 'contents', lambda _, v: v['id'])):
yield self.url_result(
DacastVODIE._get_url_from_id(video['id']), DacastVODIE, video['id'], video.get('title'))
return self.playlist_result(entries(info), playlist_id, info.get('title'))

View File

@ -1,6 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
parse_count, parse_count,
@ -12,21 +13,24 @@ from ..utils import (
class DaftsexIE(InfoExtractor): class DaftsexIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)' _VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://daftsex.com/watch/-35370899_456246186', 'url': 'https://daft.sex/watch/-35370899_456246186',
'md5': 'd95135e6cea2d905bea20dbe82cda64a', 'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
'info_dict': { 'info_dict': {
'id': '-35370899_456246186', 'id': '-35370899_456246186',
'ext': 'mp4', 'ext': 'mp4',
'title': 'just relaxing', 'title': 'just relaxing',
'description': 'just relaxing - Watch video Watch video in high quality', 'description': 'just relaxing Watch video Watch video in high quality',
'upload_date': '20201113', 'upload_date': '20201113',
'timestamp': 1605261911, 'timestamp': 1605261911,
'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb', 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
'duration': 15.0,
'view_count': int
}, },
}, { }, {
'url': 'https://daftsex.com/watch/-156601359_456242791', 'url': 'https://daft.sex/watch/-156601359_456242791',
'info_dict': { 'info_dict': {
'id': '-156601359_456242791', 'id': '-156601359_456242791',
'ext': 'mp4', 'ext': 'mp4',
@ -36,6 +40,7 @@ class DaftsexIE(InfoExtractor):
'timestamp': 1600250735, 'timestamp': 1600250735,
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ', 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
}, },
'skip': 'deleted / private'
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -60,7 +65,7 @@ class DaftsexIE(InfoExtractor):
webpage, 'player color', fatal=False) or '' webpage, 'player color', fatal=False) or ''
embed_page = self._download_webpage( embed_page = self._download_webpage(
'https://daxab.com/player/%s?color=%s' % (player_hash, player_color), 'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
video_id, headers={'Referer': url}) video_id, headers={'Referer': url})
video_params = self._parse_json( video_params = self._parse_json(
self._search_regex( self._search_regex(
@ -94,15 +99,19 @@ class DaftsexIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
} }
item = self._download_json( items = self._download_json(
f'{server_domain}/method/video.get/{video_id}', video_id, f'{server_domain}/method/video.get/{video_id}', video_id,
headers={'Referer': url}, query={ headers={'Referer': url}, query={
'token': video_params['video']['access_token'], 'token': video_params['video']['access_token'],
'videos': video_id, 'videos': video_id,
'ckey': video_params['c_key'], 'ckey': video_params['c_key'],
'credentials': video_params['video']['credentials'], 'credentials': video_params['video']['credentials'],
})['response']['items'][0] })['response']['items']
if not items:
raise ExtractorError('Video is not available', video_id=video_id, expected=True)
item = items[0]
formats = [] formats = []
for f_id, f_url in item.get('files', {}).items(): for f_id, f_url in item.get('files', {}).items():
if f_id == 'external': if f_id == 'external':

View File

@ -3,7 +3,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
None, 'Downloading Access Token', None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token'] data=urlencode_postdata(data))['access_token']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json( raise ExtractorError(self._parse_json(
e.cause.read().decode(), xid)['error_description'], expected=True) e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token) self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token self._HEADERS['Authorization'] = 'Bearer ' + token

View File

@ -11,7 +11,7 @@ from ..utils import (
class DigitalConcertHallIE(InfoExtractor): class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor' IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_ACCESS_TOKEN = None _ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall' _NETRC_MACHINE = 'digitalconcerthall'
@ -40,6 +40,19 @@ class DigitalConcertHallIE(InfoExtractor):
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'playlist_count': 3, 'playlist_count': 3,
}, {
'url': 'https://www.digitalconcerthall.com/en/film/388',
'info_dict': {
'id': '388',
'ext': 'mp4',
'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
'upload_date': '20220714',
'timestamp': 1657785600,
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _perform_login(self, username, password): def _perform_login(self, username, password):
@ -75,7 +88,7 @@ class DigitalConcertHallIE(InfoExtractor):
if not self._ACCESS_TOKEN: if not self._ACCESS_TOKEN:
self.raise_login_required(method='password') self.raise_login_required(method='password')
def _entries(self, items, language, **kwargs): def _entries(self, items, language, type_, **kwargs):
for item in items: for item in items:
video_id = item['id'] video_id = item['id']
stream_info = self._download_json( stream_info = self._download_json(
@ -103,11 +116,11 @@ class DigitalConcertHallIE(InfoExtractor):
'start_time': chapter.get('time'), 'start_time': chapter.get('time'),
'end_time': try_get(chapter, lambda x: x['time'] + x['duration']), 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
'title': chapter.get('text'), 'title': chapter.get('text'),
} for chapter in item['cuepoints']] if item.get('cuepoints') else None, } for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None,
} }
def _real_extract(self, url): def _real_extract(self, url):
language, video_id = self._match_valid_url(url).group('language', 'id') language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
if not language: if not language:
language = 'en' language = 'en'
@ -120,18 +133,18 @@ class DigitalConcertHallIE(InfoExtractor):
}] }]
vid_info = self._download_json( vid_info = self._download_json(
f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={ f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
'Accept': 'application/json', 'Accept': 'application/json',
'Accept-Language': language 'Accept-Language': language
}) })
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '') album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': video_id, 'id': video_id,
'title': vid_info.get('title'), 'title': vid_info.get('title'),
'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language, 'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
thumbnails=thumbnails, album_artist=album_artist),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'album_artist': album_artist, 'album_artist': album_artist,
} }

View File

@ -0,0 +1,35 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import traverse_obj
class DiscogsReleasePlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
'info_dict': {
'id': 'release1',
'title': 'Stockholm',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
'info_dict': {
'id': 'master113',
'title': 'Moments In Time',
},
'playlist_mincount': 53,
}]
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
display_id = f'{playlist_type}{playlist_id}'
response = self._download_json(
f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
entries = [
self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
return self.playlist_result(entries, display_id, response.get('title'))

View File

@ -3,8 +3,8 @@ import string
from .discoverygo import DiscoveryGoBaseIE from .discoverygo import DiscoveryGoBaseIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE): class DiscoveryIE(DiscoveryGoBaseIE):
@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
self._API_BASE_URL + 'streaming/video/' + video_id, self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, 'Downloading streaming JSON metadata', headers=headers) display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
e_description = self._parse_json( e_description = self._parse_json(
e.cause.read().decode(), display_id)['description'] e.cause.response.read().decode(), display_id)['description']
if 'resource not available for country' in e_description: if 'resource not available for country' in e_description:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
if 'Authorized Networks' in e_description: if 'Authorized Networks' in e_description:

View File

@ -0,0 +1,192 @@
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
extract_attributes,
int_or_none,
traverse_obj,
url_or_none,
)
class DLFBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
def _parse_button_attrs(self, button, audio_id=None):
attrs = extract_attributes(button)
audio_id = audio_id or attrs['data-audio-diraid']
url = traverse_obj(
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
'data-audio-src', expected_type=url_or_none)
ext = determine_ext(url)
return {
'id': audio_id,
'extractor_key': DLFIE.ie_key(),
'extractor': DLFIE.IE_NAME,
**traverse_obj(attrs, {
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
'thumbnail': ('data-audioimage', {url_or_none}),
'uploader': 'data-audio-producer',
'series': 'data-audio-series',
'channel': 'data-audio-origin-site-name',
'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
}, get_all=False),
'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
}
class DLFIE(DLFBaseIE):
IE_NAME = 'dlf'
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
_TESTS = [
# Audio as an HLS stream
{
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
'info_dict': {
'id': '03a3eb19',
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
'ext': 'm4a',
'duration': 3298,
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
'uploader': 'Deutschlandfunk',
'series': 'On Stage',
'channel': 'deutschlandfunk'
},
'params': {
'skip_download': 'm3u8'
},
'skip': 'This webpage no longer exists'
}, {
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
'info_dict': {
'id': 'd9cc1856',
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
'ext': 'mp3',
'duration': 291,
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
'uploader': 'Deutschlandfunk',
'series': 'Kommentare und Themen der Woche',
'channel': 'deutschlandfunk'
}
},
]
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(url, audio_id)
return self._parse_button_attrs(
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
class DLFCorpusIE(DLFBaseIE):
IE_NAME = 'dlf:corpus'
IE_DESC = 'DLF Multi-feed Archives'
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
_TESTS = [
# Recorded news broadcast with referrals to related broadcasts
{
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
'info_dict': {
'id': 'fechten-russland-belarus-ukraine-protest-100',
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
},
'playlist_mincount': 5,
'playlist': [{
'info_dict': {
'id': '1fc5d64a',
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
'ext': 'mp3',
'duration': 252,
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
'uploader': 'Deutschlandfunk',
'series': 'Sport',
'channel': 'deutschlandfunk'
}
}, {
'info_dict': {
'id': '2ada145f',
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
'ext': 'mp3',
'duration': 336,
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
'uploader': 'Deutschlandfunk',
'series': 'Deutschlandfunk Nova',
'channel': 'deutschlandfunk-nova'
}
}, {
'info_dict': {
'id': '5e55e8c9',
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
'ext': 'mp3',
'duration': 187,
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
'uploader': 'Deutschlandfunk',
'series': 'Sport am Samstag',
'channel': 'deutschlandfunk'
}
}, {
'info_dict': {
'id': '47e1a096',
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
'ext': 'mp3',
'duration': 602,
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
'uploader': 'Deutschlandfunk',
'series': 'Sport am Samstag',
'channel': 'deutschlandfunk'
}
}, {
'info_dict': {
'id': '5e55e8c9',
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
'ext': 'mp3',
'duration': 187,
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
'uploader': 'Deutschlandfunk',
'series': 'Sport am Samstag',
'channel': 'deutschlandfunk'
}
}]
},
# Podcast feed with tag buttons, playlist count fluctuates
{
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
'info_dict': {
'id': 'kommentare-und-themen-der-woche-100',
'title': 'Meinung - Kommentare und Themen der Woche',
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
},
'playlist_mincount': 10,
},
# Podcast feed with no description
{
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
'info_dict': {
'id': 'podcast-tolle-idee-100',
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
},
'playlist_mincount': 11,
},
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
return {
'_type': 'playlist',
'id': playlist_id,
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
'title': self._html_search_meta(
['og:title', 'twitter:title'], webpage, default=None),
'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
}

View File

@ -2,7 +2,7 @@ import json
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor):
return f'Bearer {token}' return f'Bearer {token}'
def _process_errors(self, e, geo_countries): def _process_errors(self, e, geo_countries):
info = self._parse_json(e.cause.read().decode('utf-8'), None) info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
error = info['errors'][0] error = info['errors'][0]
error_code = error.get('code') error_code = error.get('code')
if error_code == 'access.denied.geoblocked': if error_code == 'access.denied.geoblocked':
@ -65,6 +65,7 @@ class DPlayBaseIE(InfoExtractor):
return streaming_list return streaming_list
def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
country = self.get_param('geo_bypass_country') or country
geo_countries = [country.upper()] geo_countries = [country.upper()]
self._initialize_geo_bypass({ self._initialize_geo_bypass({
'countries': geo_countries, 'countries': geo_countries,
@ -86,7 +87,7 @@ class DPlayBaseIE(InfoExtractor):
'include': 'images,primaryChannel,show,tags' 'include': 'images,primaryChannel,show,tags'
}) })
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
self._process_errors(e, geo_countries) self._process_errors(e, geo_countries)
raise raise
video_id = video['data']['id'] video_id = video['data']['id']
@ -98,7 +99,7 @@ class DPlayBaseIE(InfoExtractor):
streaming = self._download_video_playback_info( streaming = self._download_video_playback_info(
disco_base, video_id, headers) disco_base, video_id, headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
self._process_errors(e, geo_countries) self._process_errors(e, geo_countries)
raise raise
for format_dict in streaming: for format_dict in streaming:
@ -745,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784', 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
'info_dict': { 'info_dict': {
@ -766,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
'upload_date': '20140101', 'upload_date': '20140101',
'tags': [], 'tags': [],
}, },
}, {
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
'info_dict': {
'id': '4922860',
'ext': 'mp4',
'title': 'Roadworthy Rescues | Teaser Trailer',
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
'creator': 'Originals',
'series': 'Roadworthy Rescues',
'thumbnail': r're:^https?://.+\.jpe?g$',
'upload_date': '20220907',
'timestamp': 1662523200,
'duration': 1066.356,
'tags': [],
},
}, {
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
'only_matching': True,
}] }]
_PRODUCT = 'MTOD' _PRODUCT = 'MTOD'
@ -1001,3 +1021,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
_SHOW_STR = 'show' _SHOW_STR = 'show'
_INDEX = 4 _INDEX = 4
_VIDEO_IE = DiscoveryPlusIndiaIE _VIDEO_IE = DiscoveryPlusIndiaIE
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
_TESTS = [{
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
'info_dict': {
'id': '1397691',
'ext': 'mp4',
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
'series': 'gcn',
'creator': 'Gcn',
'upload_date': '20210309',
'timestamp': 1615248000,
'duration': 2531.0,
'tags': [],
},
'skip': 'Subscription required',
'params': {'skip_download': 'm3u8'},
}]
_PRODUCT = 'web'
_DISCO_API_PARAMS = {
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
'realm': 'gcn',
'country': 'us',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
'Authorization': self._get_auth(disco_base, display_id, realm),
})

View File

@ -1,13 +1,17 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from .vimeo import VHXEmbedIE from .vimeo import VHXEmbedIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList,
clean_html, clean_html,
extract_attributes,
get_element_by_class, get_element_by_class,
get_element_by_id, get_element_by_id,
get_elements_by_class, get_elements_html_by_class,
int_or_none, int_or_none,
join_nonempty, traverse_obj,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
) )
@ -162,12 +166,13 @@ class DropoutIE(InfoExtractor):
class DropoutSeasonIE(InfoExtractor): class DropoutSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)' _PAGE_SIZE = 24
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url', 'note': 'Multi-season series with the season in the url',
'playlist_count': 17, 'playlist_count': 24,
'info_dict': { 'info_dict': {
'id': 'dimension-20-fantasy-high-season-1', 'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1' 'title': 'Dimension 20 Fantasy High - Season 1'
@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor):
{ {
'url': 'https://www.dropout.tv/dimension-20-fantasy-high', 'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url', 'note': 'Multi-season series with the season not in the url',
'playlist_count': 17, 'playlist_count': 24,
'info_dict': { 'info_dict': {
'id': 'dimension-20-fantasy-high-season-1', 'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1' 'title': 'Dimension 20 Fantasy High - Season 1'
@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor):
'id': 'dimension-20-shriek-week-season-1', 'id': 'dimension-20-shriek-week-season-1',
'title': 'Dimension 20 Shriek Week - Season 1' 'title': 'Dimension 20 Shriek Week - Season 1'
} }
},
{
'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
'note': 'Multi-season series with season in the url that requires pagination',
'playlist_count': 25,
'info_dict': {
'id': 'breaking-news-no-laugh-newsroom-season-3',
'title': 'Breaking News No Laugh Newsroom - Season 3'
}
} }
] ]
def _fetch_page(self, url, season_id, page):
page += 1
webpage = self._download_webpage(
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
def _real_extract(self, url): def _real_extract(self, url):
season_id = self._match_id(url) season_id = self._match_id(url)
season_num = self._match_valid_url(url).group('season') or 1
season_title = season_id.replace('-', ' ').title() season_title = season_id.replace('-', ' ').title()
webpage = self._download_webpage(url, season_id)
entries = [ return self.playlist_result(
self.url_result( OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']', f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')
item, 'item_url'),
ie=DropoutIE.ie_key()
) for item in get_elements_by_class('js-collection-item', webpage)
]
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
seasons, 'current_season', default='').strip()
return {
'_type': 'playlist',
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
'title': join_nonempty(season_title, current_season, delim=' - '),
'entries': entries
}

View File

@ -12,7 +12,6 @@ from ..utils import (
mimetype2ext, mimetype2ext,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
@ -25,7 +24,7 @@ class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
) )
(?P<id>[\da-z_-]+) (?P<id>[\da-z_-]+)
@ -80,7 +79,7 @@ class DRTVIE(InfoExtractor):
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
'timestamp': 1546628400, 'timestamp': 1546628400,
'upload_date': '20190104', 'upload_date': '20190104',
'duration': 3504.618, 'duration': 3504.619,
'formats': 'mincount:20', 'formats': 'mincount:20',
'release_year': 2017, 'release_year': 2017,
'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35', 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
@ -101,14 +100,16 @@ class DRTVIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bonderøven 2019 (1:8)', 'title': 'Bonderøven 2019 (1:8)',
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd', 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
'timestamp': 1603188600, 'timestamp': 1654856100,
'upload_date': '20201020', 'upload_date': '20220610',
'duration': 2576.6, 'duration': 2576.6,
'season': 'Bonderøven 2019', 'season': 'Bonderøven 2019',
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5', 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
'release_year': 2019, 'release_year': 2019,
'season_number': 2019, 'season_number': 2019,
'series': 'Frank & Kastaniegaarden' 'series': 'Frank & Kastaniegaarden',
'episode_number': 1,
'episode': 'Episode 1',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -140,10 +141,26 @@ class DRTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'this video has been removed',
}, {
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
'info_dict': {
'ext': 'mp4',
'id': '14802310112',
'timestamp': 1678786200,
'duration': 120.043,
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
'series': 'P4 København regionale nyheder',
'upload_date': '20230314',
'release_year': 0,
'description': 'Hør seneste regionale nyheder fra P4 København.',
'season': 'Regionale nyheder',
'title': 'Regionale nyheder',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
raw_video_id = self._match_id(url) raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
webpage = self._download_webpage(url, raw_video_id) webpage = self._download_webpage(url, raw_video_id)
@ -170,15 +187,17 @@ class DRTVIE(InfoExtractor):
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
else: else:
programcard_url = _PROGRAMCARD_BASE programcard_url = _PROGRAMCARD_BASE
page = self._parse_json( if is_radio_url:
self._search_regex( video_id = self._search_nextjs_data(
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage, webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
'data'), '1')['cache']['page'] else:
page = page[list(page.keys())[0]] json_data = self._search_json(
item = try_get( r'window\.__data\s*=', webpage, 'data', raw_video_id)
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']), video_id = traverse_obj(json_data, (
dict) 'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
video_id = item['customId'].split(':')[-1] {lambda x: x.split(':')[-1]}), get_all=False)
if not video_id:
raise ExtractorError('Unable to extract video id')
query['productionnumber'] = video_id query['productionnumber'] = video_id
data = self._download_json( data = self._download_json(
@ -269,10 +288,11 @@ class DRTVIE(InfoExtractor):
f['vcodec'] = 'none' f['vcodec'] = 'none'
formats.extend(f4m_formats) formats.extend(f4m_formats)
elif target == 'HLS': elif target == 'HLS':
formats.extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
uri, video_id, 'mp4', entry_protocol='m3u8_native', uri, video_id, 'mp4', entry_protocol='m3u8_native',
quality=preference, m3u8_id=format_id, quality=preference, m3u8_id=format_id, fatal=False)
fatal=False)) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else: else:
bitrate = link.get('Bitrate') bitrate = link.get('Bitrate')
if bitrate: if bitrate:

View File

@ -1,12 +1,17 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
int_or_none, int_or_none,
qualities, qualities,
) )
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)' _VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
/(?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
'title': 'Ik heb nieuws voor je', 'title': 'Ik heb nieuws voor je',
'description': 'Niet schrikken hoor', 'description': 'Niet schrikken hoor',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 9,
'view_count': int,
'like_count': int,
} }
}, { }, {
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
}, { }, {
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
'info_dict': {
'id': '100031688/b317a185',
'ext': 'mp4',
'title': 'Epic schijnbeweging',
'description': '<p>Die zag je niet eh</p>',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'duration': 12,
'view_count': int,
'like_count': int,
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -36,18 +66,23 @@ class DumpertIE(InfoExtractor):
title = item['title'] title = item['title']
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
quality = qualities(['flv', 'mobile', 'tablet', '720p']) quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
formats = [] formats = []
for variant in media.get('variants', []): for variant in media.get('variants', []):
uri = variant.get('uri') uri = variant.get('uri')
if not uri: if not uri:
continue continue
version = variant.get('version') version = variant.get('version')
formats.append({ preference = quality(version)
'url': uri, if determine_ext(uri) == 'm3u8':
'format_id': version, formats.extend(self._extract_m3u8_formats(
'quality': quality(version), uri, video_id, 'mp4', m3u8_id=version, quality=preference))
}) else:
formats.append({
'url': uri,
'format_id': version,
'quality': preference,
})
thumbnails = [] thumbnails = []
stills = item.get('stills') or {} stills = item.get('stills') or {}

View File

@ -2,7 +2,7 @@ import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor):
response = super(EaglePlatformIE, self)._download_json( response = super(EaglePlatformIE, self)._download_json(
url_or_request, video_id, *args, **kwargs) url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError): if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response) self._handle_error(response)
raise raise
return response return response

View File

@ -0,0 +1,36 @@
from .common import InfoExtractor
from ..utils import remove_end
class EbayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ebay\.com/itm/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.ebay.com/itm/194509326719',
'info_dict': {
'id': '194509326719',
'ext': 'mp4',
'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands',
},
'params': {'skip_download': 'm3u8'}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_json = self._search_json(r'"video":', webpage, 'video json', video_id)
formats = []
for key, url in video_json['playlistMap'].items():
if key == 'HLS':
formats.extend(self._extract_m3u8_formats(url, video_id, fatal=False))
elif key == 'DASH':
formats.extend(self._extract_mpd_formats(url, video_id, fatal=False))
else:
self.report_warning(f'Unsupported format {key}', video_id)
return {
'id': video_id,
'title': remove_end(self._html_extract_title(webpage), ' | eBay'),
'formats': formats
}

View File

@ -1,10 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..networking import Request
float_or_none, from ..utils import float_or_none, int_or_none, parse_iso8601
int_or_none,
parse_iso8601,
sanitized_Request,
)
class EitbIE(InfoExtractor): class EitbIE(InfoExtractor):
@ -54,7 +50,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL') hls_url = media.get('HLS_SURL')
if hls_url: if hls_url:
request = sanitized_Request( request = Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url}) headers={'Referer': url})
token_data = self._download_json( token_data = self._download_json(

View File

@ -0,0 +1,59 @@
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
traverse_obj,
url_or_none,
)
class ElevenSportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)'
_TESTS = [{
'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk',
'md5': 'c0958d9ff90e4503a75544358758921d',
'info_dict': {
'id': 'clf46yr3kenn80jgrqsjmwefk',
'title': 'Cleveland SC vs Lionsbridge FC',
'ext': 'mp4',
'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58',
'upload_date': '20230323',
'timestamp': 1679612400,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf',
'md5': 'c0958d9ff90e4503a75544358758921d',
'info_dict': {
'id': 'clhpyd53b06160jez74qhgkmf',
'title': 'AJNLF vs ARRAF',
'ext': 'mp4',
'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1',
'upload_date': '20230521',
'timestamp': 1684684800,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'params': {'skip_download': 'm3u8'}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId']
event_data = self._download_json(
f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id,
headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'})
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(event_data, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('start_time', {parse_iso8601}),
'thumbnail': ('thumbnail_url', {url_or_none}),
}),
}

View File

@ -61,6 +61,35 @@ class EmbedlyIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'http://www.permacultureetc.com/2022/12/comment-greffer-facilement-les-arbres-fruitiers.html',
'info_dict': {
'id': 'pfUK_ADTvgY',
'ext': 'mp4',
'title': 'Comment greffer facilement les arbres fruitiers ? (mois par mois)',
'description': 'md5:d3a876995e522f138aabb48e040bfb4c',
'view_count': int,
'upload_date': '20221210',
'comment_count': int,
'live_status': 'not_live',
'channel_id': 'UCsM4_jihNFYe4CtSkXvDR-Q',
'channel_follower_count': int,
'tags': ['permaculture', 'jardinage', 'dekarz', 'autonomie', 'greffe', 'fruitiers', 'arbres', 'jardin forêt', 'forêt comestible', 'damien'],
'playable_in_embed': True,
'uploader': 'permaculture agroécologie etc...',
'channel': 'permaculture agroécologie etc...',
'thumbnail': 'https://i.ytimg.com/vi/pfUK_ADTvgY/sddefault.jpg',
'duration': 1526,
'channel_url': 'https://www.youtube.com/channel/UCsM4_jihNFYe4CtSkXvDR-Q',
'age_limit': 0,
'uploader_id': 'permacultureetc',
'like_count': int,
'uploader_url': 'http://www.youtube.com/user/permacultureetc',
'categories': ['Education'],
'availability': 'public',
},
}]
@classmethod @classmethod
def _extract_from_webpage(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):
# Bypass "ie=cls" and suitable check # Bypass "ie=cls" and suitable check

View File

@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, display_id) webpage, urlh = self._download_webpage_handle(url, display_id)
video_id = self._match_id(urlh.geturl()) video_id = self._match_id(urlh.url)
hash = self._search_regex( hash = self._search_regex(
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')

View File

@ -240,7 +240,7 @@ class FiveThirtyEightIE(InfoExtractor):
class ESPNCricInfoIE(InfoExtractor): class ESPNCricInfoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/(?:cricket-)?videos?/[^#$&?/]+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135', 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135',
'info_dict': { 'info_dict': {
@ -252,6 +252,17 @@ class ESPNCricInfoIE(InfoExtractor):
'duration': 96, 'duration': 96,
}, },
'params': {'skip_download': True} 'params': {'skip_download': True}
}, {
'url': 'https://www.espncricinfo.com/cricket-videos/daryl-mitchell-mitchell-santner-is-one-of-the-best-white-ball-spinners-india-vs-new-zealand-1356225',
'info_dict': {
'id': '1356225',
'ext': 'mp4',
'description': '"Santner has done it for a long time for New Zealand - we\'re lucky to have him"',
'upload_date': '20230128',
'title': 'Mitchell: \'Santner is one of the best white-ball spinners at the moment\'',
'duration': 87,
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,60 @@
from .common import InfoExtractor
from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
class EttuTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
'info_dict': {
'id': '1573849',
'title': 'Ni Xia Lian - Shao Jieni',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677348600,
'upload_date': '20230225',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}, {
'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
'info_dict': {
'id': '1573753',
'title': 'Qiu Dang - Jorgic Darko',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677423600,
'upload_date': '20230226',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
player_settings = self._download_json(
f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
'language': 'en',
'showTitle': 'true',
'device': 'desktop',
})
stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream_response['data']['stream'], video_id, 'mp4')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(player_settings, {
'title': 'title',
'description': ('metaInformation', 'competition'),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('date', {unified_timestamp}),
'is_live': ('isLivestream', {bool_or_none}),
})
}

View File

@ -6,6 +6,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
traverse_obj,
unified_strdate, unified_strdate,
xpath_text xpath_text
) )
@ -92,42 +93,17 @@ class EuropaIE(InfoExtractor):
class EuroParlWebstreamIE(InfoExtractor): class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/ https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
(?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+) (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY', 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': { 'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe', 'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
'ext': 'mp4',
'release_timestamp': 1663137900,
'title': 'Plenary session',
'release_date': '20220914',
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
'info_dict': {
'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
'ext': 'mp4',
'release_timestamp': 1668434400,
'release_date': '20221114',
'title': 'md5:d3550280c33cc70e0678652e3d52c028',
},
'params': {
'skip_download': True,
}
}, {
# embed webpage
'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Plenary session', 'title': 'Plenary session',
'release_timestamp': 1663139069,
'release_date': '20220914', 'release_date': '20220914',
'release_timestamp': 1663137900,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -144,30 +120,54 @@ class EuroParlWebstreamIE(InfoExtractor):
'live_status': 'is_live', 'live_status': 'is_live',
}, },
'skip': 'not live anymore' 'skip': 'not live anymore'
}, {
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
'info_dict': {
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
'ext': 'mp4',
'release_date': '20230301',
'title': 'Committee on Culture and Education',
'release_timestamp': 1677666641,
}
}, {
# live stream
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI',
'info_dict': {
'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9',
'ext': 'mp4',
'release_date': '20230524',
'title': r're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}',
'release_timestamp': 1684911541,
'live_status': 'is_live',
},
'skip': 'Not live anymore'
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage_nextjs = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
json_info = self._download_json( json_info = self._download_json(
'https://vis-api.vuplay.co.uk/event/external', display_id, 'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id,
query={ query={
'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c', 'api-version': 1.0,
'external_id': display_id, 'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968',
'externalReference': display_id
}) })
formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id) formats, subtitles = [], {}
fmts, subs = self._extract_m3u8_formats_and_subtitles( for hls_url in traverse_obj(json_info, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')):
json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id) fmt, subs = self._extract_m3u8_formats_and_subtitles(hls_url, display_id)
formats.extend(fmt)
formats.extend(fmts) self._merge_subtitles(subs, target=subtitles)
self._merge_subtitles(subs, target=subtitles)
return { return {
'id': json_info['id'], 'id': json_info['id'],
'title': json_info.get('title'), 'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'release_timestamp': parse_iso8601(json_info.get('published_start')), 'release_timestamp': parse_iso8601(json_info.get('startDateTime')),
'is_live': 'LIVE' in json_info.get('state', '') 'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live'
} }

View File

@ -3,7 +3,7 @@ from ..utils import traverse_obj
class EurosportIE(InfoExtractor): class EurosportIE(InfoExtractor):
_VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)' _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml', 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
'info_dict': { 'info_dict': {
@ -44,6 +44,32 @@ class EurosportIE(InfoExtractor):
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71', 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
'upload_date': '20220727', 'upload_date': '20220727',
} }
}, {
'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
'info_dict': {
'id': '3096477',
'ext': 'mp4',
'title': 'md5:82edc17370124c7a19b3cf518517583b',
'duration': 84.0,
'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
'timestamp': 1681292028,
'upload_date': '20230412',
'display_id': 'vid1896254',
}
}, {
'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
'info_dict': {
'id': '3149108',
'ext': 'mp4',
'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
'display_id': 'vid1914115',
'timestamp': 1684403618,
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
'duration': 105.0,
'upload_date': '20230518',
}
}] }]
_TOKEN = None _TOKEN = None

View File

@ -8,6 +8,8 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
@ -19,11 +21,10 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts, merge_dicts,
network_exceptions,
parse_count, parse_count,
parse_qs, parse_qs,
qualities, qualities,
sanitized_Request, str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
url_or_none, url_or_none,
@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '274175099429670', 'id': '274175099429670',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Asif Nawab Butt', 'title': 'Asif',
'description': 'Asif Nawab Butt', 'description': '',
'uploader': 'Asif Nawab Butt', 'uploader': 'Asif Nawab Butt',
'upload_date': '20140506', 'upload_date': '20140506',
'timestamp': 1399398998, 'timestamp': 1399398998,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
'duration': 131.03,
'concurrent_view_count': int,
}, },
'expected_warnings': [
'title'
]
}, { }, {
'note': 'Video with DASH manifest', 'note': 'Video with DASH manifest',
'url': 'https://www.facebook.com/video.php?v=957955867617029', 'url': 'https://www.facebook.com/video.php?v=957955867617029',
@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params # have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '3f3798adb2b73423263e59376f1f5eb7', 'md5': 'ca63897a90c9452efee5f8c40d080e25',
'info_dict': { 'info_dict': {
'id': '10155529876156509', 'id': '10155529876156509',
'ext': 'mp4', 'ext': 'mp4',
@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'CNN', 'uploader': 'CNN',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'view_count': int, 'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
}, },
}, { }, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1417995061575415', 'id': '1417995061575415',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео', 'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
'description': 'Довгоочікуване відео', 'description': 'Довгоочікуване відео',
'timestamp': 1486648771, 'timestamp': 1486648217,
'upload_date': '20170209', 'upload_date': '20170209',
'uploader': 'Yaroslav Korpan', 'uploader': 'Yaroslav Korpan',
'uploader_id': '100000948048708', 'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
'concurrent_view_count': int,
'thumbnail': r're:^https?://.*',
'view_count': int,
'duration': 11736.446,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'La Guía Del Varón', 'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
}, },
'params': { 'skip': 'Requires logging in',
'skip_download': True,
},
}, { }, {
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Elisabeth Ahtn', 'uploader': 'Elisabeth Ahtn',
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'params': { 'skip': 'Requires logging in',
'skip_download': True,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor):
'timestamp': 1527084179, 'timestamp': 1527084179,
'upload_date': '20180523', 'upload_date': '20180523',
'uploader': 'ESL One Dota 2', 'uploader': 'ESL One Dota 2',
'uploader_id': '234218833769558', 'uploader_id': '100066514874195',
'duration': 4524.212,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor):
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
'info_dict': { 'info_dict': {
'id': '106560053808006', 'id': '106560053808006',
'ext': 'mp4',
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
'timestamp': 1549275572,
'duration': 3.413,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
}, },
'playlist_count': 2,
}, { }, {
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/watch/?v=647537299265662', 'url': 'https://www.facebook.com/watch/?v=647537299265662',
@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor):
'id': '10157667649866271', 'id': '10157667649866271',
}, },
'playlist_count': 3, 'playlist_count': 3,
'skip': 'Requires logging in',
}, { }, {
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
@ -319,7 +337,7 @@ class FacebookIE(InfoExtractor):
} }
def _perform_login(self, username, password): def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL) login_page_req = Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US') self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None, login_page = self._download_webpage(login_page_req, None,
note='Downloading login page', note='Downloading login page',
@ -340,8 +358,8 @@ class FacebookIE(InfoExtractor):
'timezone': '-60', 'timezone': '-60',
'trynum': '1', 'trynum': '1',
} }
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
try: try:
login_results = self._download_webpage(request, None, login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page') note='Logging in', errnote='unable to fetch login page')
@ -367,8 +385,8 @@ class FacebookIE(InfoExtractor):
'h': h, 'h': h,
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
} }
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
check_response = self._download_webpage(check_req, None, check_response = self._download_webpage(check_req, None,
note='Confirming login') note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None: if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
@ -390,7 +408,10 @@ class FacebookIE(InfoExtractor):
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text')) title = get_first(media, ('title', 'text'))
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} uploader_data = (
get_first(media, ('owner', {dict}))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or {})
page_title = title or self._html_search_regex(( page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>', r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
@ -415,16 +436,17 @@ class FacebookIE(InfoExtractor):
# in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/ # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail): if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
thumbnail = None thumbnail = None
view_count = parse_count(self._search_regex(
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
default=None))
info_dict = { info_dict = {
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_data.get('id'), 'uploader_id': uploader_data.get('id'),
'timestamp': timestamp, 'timestamp': timestamp,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'view_count': view_count, 'view_count': parse_count(self._search_regex(
(r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
} }
info_json_ld = self._search_json_ld(webpage, video_id, default={}) info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -459,7 +481,8 @@ class FacebookIE(InfoExtractor):
dash_manifest = video.get('dash_manifest') dash_manifest = video.get('dash_manifest')
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)))) compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=video.get('dash_manifest_url')))
def process_formats(info): def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around # Downloads with browser's User-Agent are rate limited. Working around
@ -493,6 +516,13 @@ class FacebookIE(InfoExtractor):
entries = [] entries = []
def parse_graphql_video(video): def parse_graphql_video(video):
v_id = video.get('videoId') or video.get('id') or video_id
reel_info = traverse_obj(
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
if reel_info:
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
@ -509,15 +539,15 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(video, formats)
v_id = video.get('videoId') or video.get('id') or video_id
info = { info = {
'id': v_id, 'id': v_id,
'formats': formats, 'formats': formats,
'thumbnail': traverse_obj( 'thumbnail': traverse_obj(
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
'uploader_id': try_get(video, lambda x: x['owner']['id']), 'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
'timestamp': int_or_none(video.get('publish_time')), 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
or float_or_none(video.get('length_in_second'))),
} }
process_formats(info) process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text']) description = try_get(video, lambda x: x['savable_description']['text'])
@ -778,18 +808,18 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387', 'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831', 'md5': 'f13dd37f2633595982db5ed8765474d3',
'info_dict': { 'info_dict': {
'id': '1195289147628387', 'id': '1195289147628387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9f5b142921b2dc57004fa13f76005f87', 'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:24ea7ef062215d295bdde64e778f5474', 'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'Beast Camp Training', 'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'uploader_id': '1738535909799870', 'uploader_id': '100040874179269',
'duration': 9.536, 'duration': 9.579,
'thumbnail': r're:^https?://.*', 'timestamp': 1637502609,
'upload_date': '20211121', 'upload_date': '20211121',
'timestamp': 1637502604, 'thumbnail': r're:^https?://.*',
} }
}] }]

View File

@ -3,11 +3,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs from ..compat import compat_parse_qs
from ..dependencies import websockets from ..dependencies import websockets
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
WebSocketsWrapper, WebSocketsWrapper,
js_to_json, js_to_json,
sanitized_Request,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
@ -57,7 +57,7 @@ class FC2IE(InfoExtractor):
} }
login_data = urlencode_postdata(login_form_strs) login_data = urlencode_postdata(login_form_strs)
request = sanitized_Request( request = Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@ -66,7 +66,7 @@ class FC2IE(InfoExtractor):
return False return False
# this is also needed # this is also needed
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done') login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage( self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed') login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@ -1,8 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_str, from ..networking.exceptions import HTTPError
compat_HTTPError,
)
from ..utils import ( from ..utils import (
qualities, qualities,
strip_or_none, strip_or_none,
@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor):
'https://www.filmon.com/api/vod/movie?id=%s' % video_id, 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
video_id)['response'] video_id)['response']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise raise
@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor):
channel_data = self._download_json( channel_data = self._download_json(
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise raise

View File

@ -3,10 +3,10 @@ import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError,
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -20,7 +20,7 @@ from ..utils import (
class FOXIE(InfoExtractor): class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
_TESTS = [{ _TESTS = [{
# clip # clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
# sports event, geo-restricted # sports event, geo-restricted
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/', 'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
'only_matching': True, 'only_matching': True,
}, {
# fox sports replay, geo-restricted
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
'only_matching': True,
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_HOME_PAGE_URL = 'https://www.fox.com/' _HOME_PAGE_URL = 'https://www.fox.com/'
@ -68,9 +72,9 @@ class FOXIE(InfoExtractor):
'https://api3.fox.com/v2.0/' + path, 'https://api3.fox.com/v2.0/' + path,
video_id, data=data, headers=headers) video_id, data=data, headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
entitlement_issues = self._parse_json( entitlement_issues = self._parse_json(
e.cause.read().decode(), video_id)['entitlementIssues'] e.cause.response.read().decode(), video_id)['entitlementIssues']
for e in entitlement_issues: for e in entitlement_issues:
if e.get('errorCode') == 1005: if e.get('errorCode') == 1005:
raise ExtractorError( raise ExtractorError(
@ -123,8 +127,8 @@ class FOXIE(InfoExtractor):
try: try:
m3u8_url = self._download_json(release_url, video_id)['playURL'] m3u8_url = self._download_json(release_url, video_id)['playURL']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.read().decode(), video_id) error = self._parse_json(e.cause.response.read().decode(), video_id)
if error.get('exception') == 'GeoLocationBlocked': if error.get('exception') == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=['US']) self.raise_geo_restricted(countries=['US'])
raise ExtractorError(error['description'], expected=True) raise ExtractorError(error['description'], expected=True)

Some files were not shown because too many files have changed in this diff Show More