Upgrade yt_dlp and download script

This commit is contained in:
itdominator 2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@ -8,12 +8,29 @@
function main() { function main() {
cd "$(dirname "")" _STARGET="${1}"
echo "Working Dir: " $(pwd) _SPATH="${HOME}/.config/solarfm/plugins/youtube_download"
LINK=`xclip -selection clipboard -o` LINK=`xclip -selection clipboard -o`
python "${HOME}/.config/solarfm/plugins/youtube_download/yt_dlp/__main__.py" \ cd "${_SPATH}"
--write-sub --embed-sub --sub-langs en \ echo "Working Dir: " $(pwd)
-o "${1}/%(title)s.%(ext)s" "${LINK}"
rm "${_SPATH}/../../cookies.txt"
# Note: Export cookies to file
python "${_SPATH}/yt_dlp/__main__.py" \
--cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt"
# Note: Use cookies from browser directly
# python "${_SPATH}/yt_dlp/__main__.py" \
# --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \
# -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
# Note: Download video
python "${_SPATH}/yt_dlp/__main__.py" \
-f "bestvideo[height<=1080][ext=mp4][vcodec^=avc]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
--cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \
-o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
} }
main "$@"; main "$@";

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
try: import sys
import contextvars # noqa: F401
except Exception:
raise Exception(
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
__license__ = 'Public Domain' if sys.version_info < (3, 9):
raise ImportError(
f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541
__license__ = 'The Unlicense'
import collections import collections
import getpass import getpass
@ -12,15 +12,16 @@ import itertools
import optparse import optparse
import os import os
import re import re
import sys
import traceback import traceback
from .compat import compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
from .extractor.adobepass import MSO_INFO from .extractor.adobepass import MSO_INFO
from .networking.impersonate import ImpersonateTarget
from .globals import IN_CLI, plugin_dirs
from .options import parseOpts from .options import parseOpts
from .plugins import load_all_plugins as _load_all_plugins
from .postprocessor import ( from .postprocessor import (
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
FFmpegMergerPP, FFmpegMergerPP,
@ -43,12 +44,12 @@ from .utils import (
GeoUtils, GeoUtils,
PlaylistEntries, PlaylistEntries,
SameFileError, SameFileError,
decodeOption,
download_range_func, download_range_func,
expand_path, expand_path,
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty,
match_filter_func, match_filter_func,
parse_bytes, parse_bytes,
parse_duration, parse_duration,
@ -57,15 +58,15 @@ from .utils import (
read_stdin, read_stdin,
render_table, render_table,
setproctitle, setproctitle,
shell_quote,
traverse_obj, traverse_obj,
variadic, variadic,
write_string, write_string,
) )
from .utils.networking import std_headers from .utils.networking import std_headers
from .utils._utils import _UnsafeExtensionError
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
_IN_CLI = False
def _exit(status=0, *args): def _exit(status=0, *args):
for msg in args: for msg in args:
@ -74,14 +75,16 @@ def _exit(status=0, *args):
def get_urls(urls, batchfile, verbose): def get_urls(urls, batchfile, verbose):
# Batch file verification """
@param verbose -1: quiet, 0: normal, 1: verbose
"""
batch_urls = [] batch_urls = []
if batchfile is not None: if batchfile is not None:
try: try:
batch_urls = read_batch_urls( batch_urls = read_batch_urls(
read_stdin('URLs') if batchfile == '-' read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
if verbose: if verbose == 1:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except OSError: except OSError:
_exit(f'ERROR: batch file {batchfile} could not be read') _exit(f'ERROR: batch file {batchfile} could not be read')
@ -112,9 +115,9 @@ def print_extractor_information(opts, urls):
ie.description(markdown=False, search_examples=_SEARCHES) ie.description(markdown=False, search_examples=_SEARCHES)
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
elif opts.ap_list_mso: elif opts.ap_list_mso:
out = 'Supported TV Providers:\n%s\n' % render_table( out = 'Supported TV Providers:\n{}\n'.format(render_table(
['mso', 'mso name'], ['mso', 'mso name'],
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
else: else:
return False return False
write_string(out, out=sys.stdout) write_string(out, out=sys.stdout)
@ -126,7 +129,7 @@ def set_compat_opts(opts):
if name not in opts.compat_opts: if name not in opts.compat_opts:
return False return False
opts.compat_opts.discard(name) opts.compat_opts.discard(name)
opts.compat_opts.update(['*%s' % name]) opts.compat_opts.update([f'*{name}'])
return True return True
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
@ -153,6 +156,9 @@ def set_compat_opts(opts):
opts.embed_infojson = False opts.embed_infojson = False
if 'format-sort' in opts.compat_opts: if 'format-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter.ytdl_default) opts.format_sort.extend(FormatSorter.ytdl_default)
elif 'prefer-vp9-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
if _video_multistreams_set is False and _audio_multistreams_set is False: if _video_multistreams_set is False and _audio_multistreams_set is False:
@ -219,7 +225,7 @@ def validate_options(opts):
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
if opts.wait_for_video is not None: if opts.wait_for_video is not None:
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
'time range to wait for video', opts.wait_for_video) 'time range to wait for video', opts.wait_for_video)
validate_minmax(min_wait, max_wait, 'time range to wait for video') validate_minmax(min_wait, max_wait, 'time range to wait for video')
@ -230,6 +236,11 @@ def validate_options(opts):
validate_regex('format sorting', f, FormatSorter.regex) validate_regex('format sorting', f, FormatSorter.regex)
# Postprocessor formats # Postprocessor formats
if opts.convertsubtitles == 'none':
opts.convertsubtitles = None
if opts.convertthumbnails == 'none':
opts.convertthumbnails = None
validate_regex('merge output format', opts.merge_output_format, validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
@ -249,9 +260,11 @@ def validate_options(opts):
elif value in ('inf', 'infinite'): elif value in ('inf', 'infinite'):
return float('inf') return float('inf')
try: try:
return int(value) int_value = int(value)
except (TypeError, ValueError): except (TypeError, ValueError):
validate(False, f'{name} retry count', value) validate(False, f'{name} retry count', value)
validate_positive(f'{name} retry count', int_value)
return int_value
opts.retries = parse_retries('download', opts.retries) opts.retries = parse_retries('download', opts.retries)
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
@ -261,9 +274,9 @@ def validate_options(opts):
# Retry sleep function # Retry sleep function
def parse_sleep_func(expr): def parse_sleep_func(expr):
NUMBER_RE = r'\d+(?:\.\d+)?' NUMBER_RE = r'\d+(?:\.\d+)?'
op, start, limit, step, *_ = tuple(re.fullmatch( op, start, limit, step, *_ = (*tuple(re.fullmatch(
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
expr.strip()).groups()) + (None, None) expr.strip()).groups()), None, None)
if op == 'exp': if op == 'exp':
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
@ -281,18 +294,20 @@ def validate_options(opts):
raise ValueError(f'invalid {key} retry sleep expression {expr!r}') raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
# Bytes # Bytes
def validate_bytes(name, value): def validate_bytes(name, value, strict_positive=False):
if value is None: if value is None:
return None return None
numeric_limit = parse_bytes(value) numeric_limit = parse_bytes(value)
validate(numeric_limit is not None, 'rate limit', value) validate(numeric_limit is not None, name, value)
if strict_positive:
validate_positive(name, numeric_limit, True)
return numeric_limit return numeric_limit
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True)
opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit)
opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) opts.min_filesize = validate_bytes('min filesize', opts.min_filesize)
opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) opts.max_filesize = validate_bytes('max filesize', opts.max_filesize)
opts.buffersize = validate_bytes('buffer size', opts.buffersize) opts.buffersize = validate_bytes('buffer size', opts.buffersize, True)
opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size)
# Output templates # Output templates
@ -387,16 +402,19 @@ def validate_options(opts):
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
opts.cookiesfrombrowser = (browser_name, profile, keyring, container) opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
if opts.impersonate is not None:
opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower())
# MetadataParser # MetadataParser
def metadataparser_actions(f): def metadataparser_actions(f):
if isinstance(f, str): if isinstance(f, str):
cmd = '--parse-metadata %s' % compat_shlex_quote(f) cmd = f'--parse-metadata {shell_quote(f)}'
try: try:
actions = [MetadataFromFieldPP.to_action(f)] actions = [MetadataFromFieldPP.to_action(f)]
except Exception as err: except Exception as err:
raise ValueError(f'{cmd} is invalid; {err}') raise ValueError(f'{cmd} is invalid; {err}')
else: else:
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) cmd = f'--replace-in-metadata {shell_quote(f)}'
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
for action in actions: for action in actions:
@ -407,13 +425,17 @@ def validate_options(opts):
yield action yield action
if opts.metafromtitle is not None: if opts.metafromtitle is not None:
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
opts.parse_metadata = { opts.parse_metadata = {
k: list(itertools.chain(*map(metadataparser_actions, v))) k: list(itertools.chain(*map(metadataparser_actions, v)))
for k, v in opts.parse_metadata.items() for k, v in opts.parse_metadata.items()
} }
# Other options # Other options
opts.plugin_dirs = opts.plugin_dirs
if opts.plugin_dirs is None:
opts.plugin_dirs = ['default']
if opts.playlist_items is not None: if opts.playlist_items is not None:
try: try:
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
@ -460,7 +482,7 @@ def validate_options(opts):
default_downloader = ed.get_basename() default_downloader = ed.get_basename()
for policy in opts.color.values(): for policy in opts.color.values():
if policy not in ('always', 'auto', 'no_color', 'never'): if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'):
raise ValueError(f'"{policy}" is not a valid color policy') raise ValueError(f'"{policy}" is not a valid color policy')
warnings, deprecation_warnings = [], [] warnings, deprecation_warnings = [], []
@ -586,6 +608,13 @@ def validate_options(opts):
if opts.ap_username is not None and opts.ap_password is None: if opts.ap_username is not None and opts.ap_password is None:
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
# compat option changes global state destructively; only allow from cli
if 'allow-unsafe-ext' in opts.compat_opts:
warnings.append(
'Using allow-unsafe-ext opens you up to potential attacks. '
'Use with great care!')
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
return warnings, deprecation_warnings return warnings, deprecation_warnings
@ -596,7 +625,7 @@ def get_postprocessors(opts):
yield { yield {
'key': 'MetadataParser', 'key': 'MetadataParser',
'actions': actions, 'actions': actions,
'when': when 'when': when,
} }
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
if sponsorblock_query: if sponsorblock_query:
@ -604,19 +633,19 @@ def get_postprocessors(opts):
'key': 'SponsorBlock', 'key': 'SponsorBlock',
'categories': sponsorblock_query, 'categories': sponsorblock_query,
'api': opts.sponsorblock_api, 'api': opts.sponsorblock_api,
'when': 'after_filter' 'when': 'after_filter',
} }
if opts.convertsubtitles: if opts.convertsubtitles:
yield { yield {
'key': 'FFmpegSubtitlesConvertor', 'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles, 'format': opts.convertsubtitles,
'when': 'before_dl' 'when': 'before_dl',
} }
if opts.convertthumbnails: if opts.convertthumbnails:
yield { yield {
'key': 'FFmpegThumbnailsConvertor', 'key': 'FFmpegThumbnailsConvertor',
'format': opts.convertthumbnails, 'format': opts.convertthumbnails,
'when': 'before_dl' 'when': 'before_dl',
} }
if opts.extractaudio: if opts.extractaudio:
yield { yield {
@ -641,7 +670,7 @@ def get_postprocessors(opts):
yield { yield {
'key': 'FFmpegEmbedSubtitle', 'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding # already_have_subtitle = True prevents the file from being deleted after embedding
'already_have_subtitle': opts.writesubtitles and keep_subs 'already_have_subtitle': opts.writesubtitles and keep_subs,
} }
if not opts.writeautomaticsub and keep_subs: if not opts.writeautomaticsub and keep_subs:
opts.writesubtitles = True opts.writesubtitles = True
@ -654,7 +683,7 @@ def get_postprocessors(opts):
'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_sponsor_segments': opts.sponsorblock_remove,
'remove_ranges': opts.remove_ranges, 'remove_ranges': opts.remove_ranges,
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
'force_keyframes': opts.force_keyframes_at_cuts 'force_keyframes': opts.force_keyframes_at_cuts,
} }
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support # FFmpegExtractAudioPP as containers before conversion may not support
@ -688,7 +717,7 @@ def get_postprocessors(opts):
yield { yield {
'key': 'EmbedThumbnail', 'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding # already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': opts.writethumbnail 'already_have_thumbnail': opts.writethumbnail,
} }
if not opts.writethumbnail: if not opts.writethumbnail:
opts.writethumbnail = True opts.writethumbnail = True
@ -722,7 +751,7 @@ ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'u
def parse_options(argv=None): def parse_options(argv=None):
"""@returns ParsedOptions(parser, opts, urls, ydl_opts)""" """@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
parser, opts, urls = parseOpts(argv) parser, opts, urls = parseOpts(argv)
urls = get_urls(urls, opts.batchfile, opts.verbose) urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
set_compat_opts(opts) set_compat_opts(opts)
try: try:
@ -735,7 +764,7 @@ def parse_options(argv=None):
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
any_getting = any(getattr(opts, k) for k in ( any_getting = any(getattr(opts, k) for k in (
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
)) ))
if opts.quiet is None: if opts.quiet is None:
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
@ -830,6 +859,7 @@ def parse_options(argv=None):
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
'progress_with_newline': opts.progress_with_newline, 'progress_with_newline': opts.progress_with_newline,
'progress_template': opts.progress_template, 'progress_template': opts.progress_template,
'progress_delta': opts.progress_delta,
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,
'playlistend': opts.playlistend, 'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse, 'playlistreverse': opts.playlist_reverse,
@ -858,8 +888,8 @@ def parse_options(argv=None):
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs, 'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': opts.matchtitle,
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name, 'trim_file_name': opts.trim_file_name,
@ -910,6 +940,7 @@ def parse_options(argv=None):
'postprocessors': postprocessors, 'postprocessors': postprocessors,
'fixup': opts.fixup, 'fixup': opts.fixup,
'source_address': opts.source_address, 'source_address': opts.source_address,
'impersonate': opts.impersonate,
'call_home': opts.call_home, 'call_home': opts.call_home,
'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval_requests': opts.sleep_interval_requests,
'sleep_interval': opts.sleep_interval, 'sleep_interval': opts.sleep_interval,
@ -959,6 +990,11 @@ def _real_main(argv=None):
if opts.ffmpeg_location: if opts.ffmpeg_location:
FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
# load all plugins into the global lookup
plugin_dirs.value = opts.plugin_dirs
if plugin_dirs.value:
_load_all_plugins()
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:
pre_process = opts.update_self or opts.rm_cachedir pre_process = opts.update_self or opts.rm_cachedir
actual_use = all_urls or opts.load_info_filename actual_use = all_urls or opts.load_info_filename
@ -979,11 +1015,68 @@ def _real_main(argv=None):
traceback.print_exc() traceback.print_exc()
ydl._download_retcode = 100 ydl._download_retcode = 100
if opts.list_impersonate_targets:
known_targets = [
# List of simplified targets we know are supported,
# to help users know what dependencies may be required.
(ImpersonateTarget('chrome'), 'curl_cffi'),
(ImpersonateTarget('safari'), 'curl_cffi'),
(ImpersonateTarget('firefox'), 'curl_cffi>=0.10'),
(ImpersonateTarget('edge'), 'curl_cffi'),
]
available_targets = ydl._get_available_impersonate_targets()
def make_row(target, handler):
return [
join_nonempty(target.client.title(), target.version, delim='-') or '-',
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
handler,
]
rows = [make_row(target, handler) for target, handler in available_targets]
for known_target, known_handler in known_targets:
if not any(
known_target in target and known_handler.startswith(handler)
for target, handler in available_targets
):
rows.insert(0, [
ydl._format_out(text, ydl.Styles.SUPPRESS)
for text in make_row(known_target, f'{known_handler} (unavailable)')
])
ydl.to_screen('[info] Available impersonate targets')
ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-'))
return
if not actual_use: if not actual_use:
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or (is_onefile and attached_processes == 2):
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')
@ -1002,11 +1095,10 @@ def _real_main(argv=None):
def main(argv=None): def main(argv=None):
global _IN_CLI IN_CLI.value = True
_IN_CLI = True
try: try:
_exit(*variadic(_real_main(argv))) _exit(*variadic(_real_main(argv)))
except DownloadError: except (CookieLoadError, DownloadError):
_exit(1) _exit(1)
except SameFileError as e: except SameFileError as e:
_exit(f'ERROR: {e}') _exit(f'ERROR: {e}')
@ -1024,9 +1116,9 @@ def main(argv=None):
from .extractor import gen_extractors, list_extractors from .extractor import gen_extractors, list_extractors
__all__ = [ __all__ = [
'main',
'YoutubeDL', 'YoutubeDL',
'parse_options',
'gen_extractors', 'gen_extractors',
'list_extractors', 'list_extractors',
'main',
'parse_options',
] ]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Execute with # Execute with
# $ python -m yt_dlp # $ python3 -m yt_dlp
import sys import sys

View File

@ -1,6 +1,6 @@
import sys import sys
from PyInstaller.utils.hooks import collect_submodules from PyInstaller.utils.hooks import collect_submodules, collect_data_files
def pycryptodome_module(): def pycryptodome_module():
@ -10,7 +10,7 @@ def pycryptodome_module():
try: try:
import Crypto # noqa: F401 import Crypto # noqa: F401
print('WARNING: Using Crypto since Cryptodome is not available. ' print('WARNING: Using Crypto since Cryptodome is not available. '
'Install with: pip install pycryptodomex', file=sys.stderr) 'Install with: python3 -m pip install pycryptodomex', file=sys.stderr)
return 'Crypto' return 'Crypto'
except ImportError: except ImportError:
pass pass
@ -21,12 +21,16 @@ def get_hidden_imports():
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
yield pycryptodome_module() yield pycryptodome_module()
yield from collect_submodules('websockets') # Only `websockets` is required, others are collected just in case
for module in ('websockets', 'requests', 'urllib3'):
yield from collect_submodules(module)
# These are auto-detected, but explicitly add them just in case # These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi') yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
hiddenimports = list(get_hidden_imports()) hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}') print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])

View File

@ -3,7 +3,6 @@ from math import ceil
from .compat import compat_ord from .compat import compat_ord
from .dependencies import Cryptodome from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes
if Cryptodome.AES: if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
@ -17,15 +16,15 @@ if Cryptodome.AES:
else: else:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16 BLOCK_SIZE_BYTES = 16
@ -68,7 +67,7 @@ def pad_block(block, padding_mode):
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
if padding_mode == 'iso7816' and padding_size: if padding_mode == 'iso7816' and padding_size:
block = block + [0x80] # NB: += mutates list block = [*block, 0x80] # NB: += mutates list
padding_size -= 1 padding_size -= 1
return block + [PADDING_BYTE[padding_mode]] * padding_size return block + [PADDING_BYTE[padding_mode]] * padding_size
@ -84,7 +83,7 @@ def aes_ecb_encrypt(data, key, iv=None):
@returns {int[]} encrypted data @returns {int[]} encrypted data
""" """
expanded_key = key_expansion(key) expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = [] encrypted_data = []
for i in range(block_count): for i in range(block_count):
@ -104,15 +103,13 @@ def aes_ecb_decrypt(data, key, iv=None):
@returns {int[]} decrypted data @returns {int[]} decrypted data
""" """
expanded_key = key_expansion(key) expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = [] encrypted_data = []
for i in range(block_count): for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_decrypt(block, expanded_key) encrypted_data += aes_decrypt(block, expanded_key)
encrypted_data = encrypted_data[:len(data)] return encrypted_data[:len(data)]
return encrypted_data
def aes_ctr_decrypt(data, key, iv): def aes_ctr_decrypt(data, key, iv):
@ -137,7 +134,7 @@ def aes_ctr_encrypt(data, key, iv):
@returns {int[]} encrypted data @returns {int[]} encrypted data
""" """
expanded_key = key_expansion(key) expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
counter = iter_vector(iv) counter = iter_vector(iv)
encrypted_data = [] encrypted_data = []
@ -148,9 +145,7 @@ def aes_ctr_encrypt(data, key, iv):
cipher_counter_block = aes_encrypt(counter_block, expanded_key) cipher_counter_block = aes_encrypt(counter_block, expanded_key)
encrypted_data += xor(block, cipher_counter_block) encrypted_data += xor(block, cipher_counter_block)
encrypted_data = encrypted_data[:len(data)] return encrypted_data[:len(data)]
return encrypted_data
def aes_cbc_decrypt(data, key, iv): def aes_cbc_decrypt(data, key, iv):
@ -163,7 +158,7 @@ def aes_cbc_decrypt(data, key, iv):
@returns {int[]} decrypted data @returns {int[]} decrypted data
""" """
expanded_key = key_expansion(key) expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
decrypted_data = [] decrypted_data = []
previous_cipher_block = iv previous_cipher_block = iv
@ -174,9 +169,7 @@ def aes_cbc_decrypt(data, key, iv):
decrypted_block = aes_decrypt(block, expanded_key) decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block) decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)] return decrypted_data[:len(data)]
return decrypted_data
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
@ -190,7 +183,7 @@ def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
@returns {int[]} encrypted data @returns {int[]} encrypted data
""" """
expanded_key = key_expansion(key) expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = [] encrypted_data = []
previous_cipher_block = iv previous_cipher_block = iv
@ -224,10 +217,10 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
if len(nonce) == 12: if len(nonce) == 12:
j0 = nonce + [0, 0, 0, 1] j0 = [*nonce, 0, 0, 0, 1]
else: else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
j0 = ghash(hash_subkey, ghash_in) j0 = ghash(hash_subkey, ghash_in)
# TODO: add nonce support to aes_ctr_decrypt # TODO: add nonce support to aes_ctr_decrypt
@ -236,17 +229,17 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
iv_ctr = inc(j0) iv_ctr = inc(j0)
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
pad_len = len(data) // 16 * 16 pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
s_tag = ghash( s_tag = ghash(
hash_subkey, hash_subkey,
data data
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + [0] * pad_len # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + list((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data + ((len(data) * 8).to_bytes(8, 'big'))), # length of data
) )
if tag != aes_ctr_encrypt(s_tag, key, j0): if tag != aes_ctr_encrypt(s_tag, key, j0):
raise ValueError("Mismatching authentication tag") raise ValueError('Mismatching authentication tag')
return decrypted_data return decrypted_data
@ -288,9 +281,7 @@ def aes_decrypt(data, expanded_key):
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
data = shift_rows_inv(data) data = shift_rows_inv(data)
data = sub_bytes_inv(data) data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
def aes_decrypt_text(data, password, key_size_bytes): def aes_decrypt_text(data, password, key_size_bytes):
@ -308,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
""" """
NONCE_LENGTH_BYTES = 8 NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data)) data = list(base64.b64decode(data))
password = bytes_to_intlist(password.encode()) password = list(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@ -318,9 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:] cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
plaintext = intlist_to_bytes(decrypted_data) return bytes(decrypted_data)
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
@ -428,9 +417,7 @@ def key_expansion(data):
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:] temp = data[-4:]
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
data = data[:expanded_key_size_bytes] return data[:expanded_key_size_bytes]
return data
def iter_vector(iv): def iter_vector(iv):
@ -511,7 +498,7 @@ def block_product(block_x, block_y):
# NIST SP 800-38D, Algorithm 1 # NIST SP 800-38D, Algorithm 1
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
block_v = block_y[:] block_v = block_y[:]
@ -534,7 +521,7 @@ def ghash(subkey, data):
# NIST SP 800-38D, Algorithm 2 # NIST SP 800-38D, Algorithm 2
if len(data) % BLOCK_SIZE_BYTES: if len(data) % BLOCK_SIZE_BYTES:
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
last_y = [0] * BLOCK_SIZE_BYTES last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES): for i in range(0, len(data), BLOCK_SIZE_BYTES):
@ -547,19 +534,17 @@ def ghash(subkey, data):
__all__ = [ __all__ = [
'aes_cbc_decrypt', 'aes_cbc_decrypt',
'aes_cbc_decrypt_bytes', 'aes_cbc_decrypt_bytes',
'aes_ctr_decrypt',
'aes_decrypt_text',
'aes_decrypt',
'aes_ecb_decrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'aes_cbc_encrypt', 'aes_cbc_encrypt',
'aes_cbc_encrypt_bytes', 'aes_cbc_encrypt_bytes',
'aes_ctr_decrypt',
'aes_ctr_encrypt', 'aes_ctr_encrypt',
'aes_decrypt',
'aes_decrypt_text',
'aes_ecb_decrypt',
'aes_ecb_encrypt', 'aes_ecb_encrypt',
'aes_encrypt', 'aes_encrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'key_expansion', 'key_expansion',
'pad_block', 'pad_block',
'pkcs7_padding', 'pkcs7_padding',

View File

@ -81,10 +81,10 @@ class Cache:
cachedir = self._get_root_dir() cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')): if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
self._ydl.to_screen( self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True) f'Removing cache dir {cachedir} .', skip_eol=True)
if os.path.exists(cachedir): if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True) self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir) shutil.rmtree(cachedir)

View File

@ -1,5 +0,0 @@
import warnings
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
casefold = str.casefold

View File

@ -1,5 +1,4 @@
import os import os
import sys
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
@ -24,36 +23,14 @@ def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s):
import re
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
from shlex import quote as compat_shlex_quote # noqa: F401
def compat_ord(c): def compat_ord(c):
return c if isinstance(c, int) else ord(c) return c if isinstance(c, int) else ord(c)
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792 # See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser # https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce'): if os.name in ('nt', 'ce'):
def compat_expanduser(path): def compat_expanduser(path):
HOME = os.environ.get('HOME') HOME = os.environ.get('HOME')
if not HOME: if not HOME:

View File

@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module del passthrough_module
import base64 import functools # noqa: F401
import urllib.error import os
import urllib.parse
compat_str = str
compat_b64decode = base64.b64decode compat_os_name = os.name
compat_realpath = os.path.realpath
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs def compat_shlex_quote(s):
compat_urllib_parse_unquote = urllib.parse.unquote from ..utils import shell_quote
compat_urllib_parse_urlencode = urllib.parse.urlencode return shell_quote(s)
compat_urllib_parse_urlparse = urllib.parse.urlparse

View File

@ -30,11 +30,12 @@ from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401 from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError, compat_realpath from . import compat_expanduser, compat_HTMLParseError
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
@ -70,7 +71,6 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client compat_http_client = http.client
compat_http_server = http.server compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input compat_input = input
compat_integer_types = (int, ) compat_integer_types = (int, )
compat_itertools_count = itertools.count compat_itertools_count = itertools.count
@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs
compat_map = map compat_map = map
compat_numeric_types = (int, float, complex) compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = compat_realpath compat_os_path_realpath = os.path.realpath
compat_print = print compat_print = print
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError compat_urllib_HTTPError = compat_HTTPError
compat_urllib_parse = urllib.parse compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote compat_urllib_parse_quote = urllib.parse.quote
@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_zip = zip compat_zip = zip
workaround_optparse_bug9161 = lambda: None workaround_optparse_bug9161 = lambda: None
compat_str = str
compat_b64decode = base64.b64decode
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
legacy = [] legacy = []

View File

@ -15,7 +15,7 @@ def get_package_info(module):
name=getattr(module, '_yt_dlp__identifier', module.__name__), name=getattr(module, '_yt_dlp__identifier', module.__name__),
version=str(next(filter(None, ( version=str(next(filter(None, (
getattr(module, attr, None) getattr(module, attr, None)
for attr in ('__version__', 'version_string', 'version') for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
)), None))) )), None)))
@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
callback(attr) callback(attr)
return ret return ret
@functools.lru_cache(maxsize=None) @functools.cache
def from_child(attr): def from_child(attr):
nonlocal child nonlocal child
if attr not in allowed_attributes: if attr not in allowed_attributes:

View File

@ -1,26 +0,0 @@
# flake8: noqa: F405
from functools import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'functools')
del passthrough_module
try:
cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)
try:
cached_property # >= 3.8
except NameError:
class cached_property:
def __init__(self, func):
update_wrapper(self, func)
self.func = func
def __get__(self, instance, _):
if instance is None:
return self
setattr(instance, self.func.__name__, self.func(instance))
return getattr(instance, self.func.__name__)

View File

@ -1,16 +1,22 @@
tests = {
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
}
def what(file=None, h=None): def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only) """Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
""" """
if h is None: if h is None:
with open(file, 'rb') as f: with open(file, 'rb') as f:
h = f.read(12) h = f.read(12)
return next((type_ for type_, test in tests.items() if test(h)), None)
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
return 'webp'
if h.startswith(b'\x89PNG'):
return 'png'
if h.startswith(b'\xFF\xD8\xFF'):
return 'jpeg'
if h.startswith(b'GIF'):
return 'gif'
return None

View File

@ -1,7 +1,7 @@
# flake8: noqa: F405 # flake8: noqa: F405
from urllib import * # noqa: F403 from urllib import * # noqa: F403
del request del request # noqa: F821
from . import request # noqa: F401 from . import request # noqa: F401
from ..compat_utils import passthrough_module from ..compat_utils import passthrough_module

View File

@ -7,13 +7,13 @@ passthrough_module(__name__, 'urllib.request')
del passthrough_module del passthrough_module
from .. import compat_os_name import os
if compat_os_name == 'nt': if os.name == 'nt':
# On older python versions, proxies are extracted from Windows registry erroneously. [1] # On older Python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
# it to http on these older python versions to avoid issues # it to http on these older Python versions to avoid issues
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
# 1: https://github.com/python/cpython/issues/86793 # 1: https://github.com/python/cpython/issues/86793
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
@ -37,4 +37,4 @@ if compat_os_name == 'nt':
def getproxies(): def getproxies():
return getproxies_environment() or getproxies_registry_patched() return getproxies_environment() or getproxies_registry_patched()
del compat_os_name del os

View File

@ -1,6 +1,10 @@
import base64 import base64
import collections import collections
import contextlib import contextlib
import datetime as dt
import functools
import glob
import hashlib
import http.cookiejar import http.cookiejar
import http.cookies import http.cookies
import io import io
@ -14,16 +18,13 @@ import sys
import tempfile import tempfile
import time import time
import urllib.request import urllib.request
from datetime import datetime, timedelta, timezone
from enum import Enum, auto from enum import Enum, auto
from hashlib import pbkdf2_hmac
from .aes import ( from .aes import (
aes_cbc_decrypt_bytes, aes_cbc_decrypt_bytes,
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import functools
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -31,6 +32,8 @@ from .dependencies import (
) )
from .minicurses import MultilinePrinter, QuietMultilinePrinter from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import ( from .utils import (
DownloadError,
YoutubeDLError,
Popen, Popen,
error_to_str, error_to_str,
expand_path, expand_path,
@ -43,7 +46,7 @@ from .utils import (
from .utils._utils import _YDLLogger from .utils._utils import _YDLLogger
from .utils.networking import normalize_url from .utils.networking import normalize_url
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@ -83,24 +86,31 @@ def _create_progress_bar(logger):
return printer return printer
class CookieLoadError(YoutubeDLError):
pass
def load_cookies(cookie_file, browser_specification, ydl): def load_cookies(cookie_file, browser_specification, ydl):
cookie_jars = [] try:
if browser_specification is not None: cookie_jars = []
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) if browser_specification is not None:
cookie_jars.append( browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) cookie_jars.append(
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
if cookie_file is not None: if cookie_file is not None:
is_filename = is_path_like(cookie_file) is_filename = is_path_like(cookie_file)
if is_filename: if is_filename:
cookie_file = expand_path(cookie_file) cookie_file = expand_path(cookie_file)
jar = YoutubeDLCookieJar(cookie_file) jar = YoutubeDLCookieJar(cookie_file)
if not is_filename or os.access(cookie_file, os.R_OK): if not is_filename or os.access(cookie_file, os.R_OK):
jar.load() jar.load()
cookie_jars.append(jar) cookie_jars.append(jar)
return _merge_cookie_jars(cookie_jars) return _merge_cookie_jars(cookie_jars)
except Exception:
raise CookieLoadError('failed to load cookies')
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
@ -118,17 +128,18 @@ def _extract_firefox_cookies(profile, container, logger):
logger.info('Extracting cookies from firefox') logger.info('Extracting cookies from firefox')
if not sqlite3: if not sqlite3:
logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support') 'Please use a Python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar() return YoutubeDLCookieJar()
if profile is None: if profile is None:
search_root = _firefox_browser_dir() search_roots = list(_firefox_browser_dirs())
elif _is_path(profile): elif _is_path(profile):
search_root = profile search_roots = [profile]
else: else:
search_root = os.path.join(_firefox_browser_dir(), profile) search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
search_root = ', '.join(map(repr, search_roots))
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
if cookie_database_path is None: if cookie_database_path is None:
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
@ -142,7 +153,7 @@ def _extract_firefox_cookies(profile, container, logger):
identities = json.load(containers).get('identities', []) identities = json.load(containers).get('identities', [])
container_id = next((context.get('userContextId') for context in identities if container in ( container_id = next((context.get('userContextId') for context in identities if container in (
context.get('name'), context.get('name'),
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
)), None) )), None)
if not isinstance(container_id, int): if not isinstance(container_id, int):
raise ValueError(f'could not find firefox container "{container}" in containers.json') raise ValueError(f'could not find firefox container "{container}" in containers.json')
@ -182,12 +193,28 @@ def _extract_firefox_cookies(profile, container, logger):
cursor.connection.close() cursor.connection.close()
def _firefox_browser_dir(): def _firefox_browser_dirs():
if sys.platform in ('cygwin', 'win32'): if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') yield from map(os.path.expandvars, (
R'%APPDATA%\Mozilla\Firefox\Profiles',
R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
))
elif sys.platform == 'darwin': elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox') yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
return os.path.expanduser('~/.mozilla/firefox')
else:
yield from map(os.path.expanduser, (
'~/.mozilla/firefox',
'~/snap/firefox/common/.mozilla/firefox',
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
))
def _firefox_cookie_dbs(roots):
for root in map(os.path.abspath, roots):
for pattern in ('', '*/', 'Profiles/*/'):
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
def _get_chromium_based_browser_settings(browser_name): def _get_chromium_based_browser_settings(browser_name):
@ -202,6 +229,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
}[browser_name] }[browser_name]
elif sys.platform == 'darwin': elif sys.platform == 'darwin':
@ -213,6 +241,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata, 'Microsoft Edge'), 'edge': os.path.join(appdata, 'Microsoft Edge'),
'opera': os.path.join(appdata, 'com.operasoftware.Opera'), 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
'vivaldi': os.path.join(appdata, 'Vivaldi'), 'vivaldi': os.path.join(appdata, 'Vivaldi'),
'whale': os.path.join(appdata, 'Naver/Whale'),
}[browser_name] }[browser_name]
else: else:
@ -224,6 +253,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(config, 'microsoft-edge'), 'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'), 'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'), 'vivaldi': os.path.join(config, 'vivaldi'),
'whale': os.path.join(config, 'naver-whale'),
}[browser_name] }[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
@ -235,6 +265,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
'whale': 'Whale',
}[browser_name] }[browser_name]
browsers_without_profiles = {'opera'} browsers_without_profiles = {'opera'}
@ -242,7 +273,7 @@ def _get_chromium_based_browser_settings(browser_name):
return { return {
'browser_dir': browser_dir, 'browser_dir': browser_dir,
'keyring_name': keyring_name, 'keyring_name': keyring_name,
'supports_profiles': browser_name not in browsers_without_profiles 'supports_profiles': browser_name not in browsers_without_profiles,
} }
@ -251,7 +282,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
if not sqlite3: if not sqlite3:
logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support') 'Please use a Python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar() return YoutubeDLCookieJar()
config = _get_chromium_based_browser_settings(browser_name) config = _get_chromium_based_browser_settings(browser_name)
@ -268,17 +299,23 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.error(f'{browser_name} does not support profiles') logger.error(f'{browser_name} does not support profiles')
search_root = config['browser_dir'] search_root = config['browser_dir']
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
if cookie_database_path is None: if cookie_database_path is None:
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
cursor = None cursor = None
try: try:
cursor = _open_database_copy(cookie_database_path, tmpdir) cursor = _open_database_copy(cookie_database_path, tmpdir)
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
decryptor = get_cookie_decryptor(
config['browser_dir'], config['keyring_name'], logger,
keyring=keyring, meta_version=meta_version)
cursor.connection.text_factory = bytes cursor.connection.text_factory = bytes
column_names = _get_column_names(cursor, 'cookies') column_names = _get_column_names(cursor, 'cookies')
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
@ -307,6 +344,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
counts['unencrypted'] = unencrypted_cookies counts['unencrypted'] = unencrypted_cookies
logger.debug(f'cookie version breakdown: {counts}') logger.debug(f'cookie version breakdown: {counts}')
return jar return jar
except PermissionError as error:
if os.name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message)
raise DownloadError(message) # force exit
raise
finally: finally:
if cursor is not None: if cursor is not None:
cursor.connection.close() cursor.connection.close()
@ -324,6 +367,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None: if value is None:
return is_encrypted, None return is_encrypted, None
# In chrome, session cookies have expires_utc set to 0
# In our cookie-store, cookies that do not expire should have expires set to None
if not expires_utc:
expires_utc = None
return is_encrypted, http.cookiejar.Cookie( return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False, version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
@ -365,22 +413,23 @@ class ChromeCookieDecryptor:
raise NotImplementedError('Must be implemented by sub classes') raise NotImplementedError('Must be implemented by sub classes')
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
if sys.platform == 'darwin': if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger) return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
elif sys.platform in ('win32', 'cygwin'): elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger) return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger, *, keyring=None): def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = self.derive_key(b'peanuts') self._v10_key = self.derive_key(b'peanuts')
self._empty_key = self.derive_key(b'') self._empty_key = self.derive_key(b'')
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
self._browser_keyring_name = browser_keyring_name self._browser_keyring_name = browser_keyring_name
self._keyring = keyring self._keyring = keyring
self._meta_version = meta_version or 0
@functools.cached_property @functools.cached_property
def _v11_key(self): def _v11_key(self):
@ -409,14 +458,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
if version == b'v10': if version == b'v10':
self._cookie_counts['v10'] += 1 self._cookie_counts['v10'] += 1
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
elif version == b'v11': elif version == b'v11':
self._cookie_counts['v11'] += 1 self._cookie_counts['v11'] += 1
if self._v11_key is None: if self._v11_key is None:
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v11_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
@ -425,11 +478,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
class MacChromeCookieDecryptor(ChromeCookieDecryptor): class MacChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger): def __init__(self, browser_keyring_name, logger, meta_version=None):
self._logger = logger self._logger = logger
password = _get_mac_keyring_password(browser_keyring_name, logger) password = _get_mac_keyring_password(browser_keyring_name, logger)
self._v10_key = None if password is None else self.derive_key(password) self._v10_key = None if password is None else self.derive_key(password)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
@staticmethod @staticmethod
def derive_key(password): def derive_key(password):
@ -447,7 +501,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -457,10 +512,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_root, logger): def __init__(self, browser_root, logger, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = _get_windows_v10_key(browser_root, logger) self._v10_key = _get_windows_v10_key(browser_root, logger)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
def decrypt(self, encrypted_value): def decrypt(self, encrypted_value):
version = encrypted_value[:3] version = encrypted_value[:3]
@ -484,7 +540,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
authentication_tag = raw_ciphertext[-authentication_tag_length:] authentication_tag = raw_ciphertext[-authentication_tag_length:]
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) return _decrypt_aes_gcm(
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -575,7 +633,7 @@ class DataParser:
def _mac_absolute_time_to_posix(timestamp): def _mac_absolute_time_to_posix(timestamp):
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
def _parse_safari_cookies_header(data, logger): def _parse_safari_cookies_header(data, logger):
@ -708,40 +766,38 @@ def _get_linux_desktop_environment(env, logger):
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
desktop_session = env.get('DESKTOP_SESSION', None) desktop_session = env.get('DESKTOP_SESSION', None)
if xdg_current_desktop is not None: if xdg_current_desktop is not None:
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() for part in map(str.strip, xdg_current_desktop.split(':')):
if part == 'Unity':
if xdg_current_desktop == 'Unity': if desktop_session is not None and 'gnome-fallback' in desktop_session:
if desktop_session is not None and 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
elif part == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif part == 'GNOME':
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
else: elif part == 'X-Cinnamon':
return _LinuxDesktopEnvironment.UNITY return _LinuxDesktopEnvironment.CINNAMON
elif xdg_current_desktop == 'Deepin': elif part == 'KDE':
return _LinuxDesktopEnvironment.DEEPIN kde_version = env.get('KDE_SESSION_VERSION', None)
elif xdg_current_desktop == 'GNOME': if kde_version == '5':
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.KDE5
elif xdg_current_desktop == 'X-Cinnamon': elif kde_version == '6':
return _LinuxDesktopEnvironment.CINNAMON return _LinuxDesktopEnvironment.KDE6
elif xdg_current_desktop == 'KDE': elif kde_version == '4':
kde_version = env.get('KDE_SESSION_VERSION', None) return _LinuxDesktopEnvironment.KDE4
if kde_version == '5': else:
return _LinuxDesktopEnvironment.KDE5 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
elif kde_version == '6': return _LinuxDesktopEnvironment.KDE4
return _LinuxDesktopEnvironment.KDE6 elif part == 'Pantheon':
elif kde_version == '4': return _LinuxDesktopEnvironment.PANTHEON
return _LinuxDesktopEnvironment.KDE4 elif part == 'XFCE':
else: return _LinuxDesktopEnvironment.XFCE
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') elif part == 'UKUI':
return _LinuxDesktopEnvironment.KDE4 return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'Pantheon': elif part == 'LXQt':
return _LinuxDesktopEnvironment.PANTHEON return _LinuxDesktopEnvironment.LXQT
elif xdg_current_desktop == 'XFCE': logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
return _LinuxDesktopEnvironment.XFCE
elif xdg_current_desktop == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
else:
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif desktop_session is not None: elif desktop_session is not None:
if desktop_session == 'deepin': if desktop_session == 'deepin':
@ -794,7 +850,7 @@ def _choose_linux_keyring(logger):
elif desktop_environment == _LinuxDesktopEnvironment.KDE6: elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
linux_keyring = _LinuxKeyring.KWALLET6 linux_keyring = _LinuxKeyring.KWALLET6
elif desktop_environment in ( elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
): ):
linux_keyring = _LinuxKeyring.BASICTEXT linux_keyring = _LinuxKeyring.BASICTEXT
else: else:
@ -829,7 +885,7 @@ def _get_kwallet_network_wallet(keyring, logger):
'dbus-send', '--session', '--print-reply=literal', 'dbus-send', '--session', '--print-reply=literal',
f'--dest={service_name}', f'--dest={service_name}',
wallet_path, wallet_path,
'org.kde.KWallet.networkWallet' 'org.kde.KWallet.networkWallet',
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode: if returncode:
@ -859,7 +915,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
'kwallet-query', 'kwallet-query',
'--read-password', f'{browser_keyring_name} Safe Storage', '--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys', '--folder', f'{browser_keyring_name} Keys',
network_wallet network_wallet,
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode: if returncode:
@ -899,9 +955,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
for item in col.get_all_items(): for item in col.get_all_items():
if item.get_label() == f'{browser_keyring_name} Safe Storage': if item.get_label() == f'{browser_keyring_name} Safe Storage':
return item.get_secret() return item.get_secret()
else: logger.error('failed to read from keyring')
logger.error('failed to read from keyring') return b''
return b''
def _get_linux_keyring_password(browser_keyring_name, keyring, logger): def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
@ -947,7 +1002,7 @@ def _get_windows_v10_key(browser_root, logger):
References: References:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
""" """
path = _find_most_recently_used_file(browser_root, 'Local State', logger) path = _newest(_find_files(browser_root, 'Local State', logger))
if path is None: if path is None:
logger.error('could not find local state file') logger.error('could not find local state file')
return None return None
@ -970,13 +1025,15 @@ def _get_windows_v10_key(browser_root, logger):
def pbkdf2_sha1(password, salt, iterations, key_length): def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac('sha1', password, salt, iterations, key_length) return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
for key in keys: for key in keys:
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@ -984,7 +1041,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
return None return None
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
try: try:
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
except ValueError: except ValueError:
@ -992,6 +1049,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
return None return None
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
@ -1021,11 +1080,12 @@ def _decrypt_windows_dpapi(ciphertext, logger):
None, # pvReserved: must be NULL None, # pvReserved: must be NULL
None, # pPromptStruct: information about prompts to display None, # pPromptStruct: information about prompts to display
0, # dwFlags 0, # dwFlags
ctypes.byref(blob_out) # pDataOut ctypes.byref(blob_out), # pDataOut
) )
if not ret: if not ret:
logger.warning('failed to decrypt with DPAPI', only_once=True) message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
return None logger.error(message)
raise DownloadError(message) # force exit
result = ctypes.string_at(blob_out.pbData, blob_out.cbData) result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
ctypes.windll.kernel32.LocalFree(blob_out.pbData) ctypes.windll.kernel32.LocalFree(blob_out.pbData)
@ -1049,17 +1109,20 @@ def _get_column_names(cursor, table_name):
return [row[1].decode() for row in table_info] return [row[1].decode() for row in table_info]
def _find_most_recently_used_file(root, filename, logger): def _newest(files):
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
def _find_files(root, filename, logger):
# if there are multiple browser profiles, take the most recently used one # if there are multiple browser profiles, take the most recently used one
i, paths = 0, [] i = 0
with _create_progress_bar(logger) as progress_bar: with _create_progress_bar(logger) as progress_bar:
for curr_root, dirs, files in os.walk(root): for curr_root, _, files in os.walk(root):
for file in files: for file in files:
i += 1 i += 1
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
if file == filename: if file == filename:
paths.append(os.path.join(curr_root, file)) yield os.path.join(curr_root, file)
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
def _merge_cookie_jars(jars): def _merge_cookie_jars(jars):
@ -1073,7 +1136,7 @@ def _merge_cookie_jars(jars):
def _is_path(value): def _is_path(value):
return os.path.sep in value return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
@ -1094,24 +1157,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_RESERVED = { _RESERVED = {
"expires", 'expires',
"path", 'path',
"comment", 'comment',
"domain", 'domain',
"max-age", 'max-age',
"secure", 'secure',
"httponly", 'httponly',
"version", 'version',
"samesite", 'samesite',
} }
_FLAGS = {"secure", "httponly"} _FLAGS = {'secure', 'httponly'}
# Added 'bad' group to catch the remaining value # Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r""" _COOKIE_PATTERN = re.compile(r'''
\s* # Optional whitespace at start of cookie \s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key' (?P<key> # Start of group 'key'
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
) # End of group 'key' ) # End of group 'key'
( # Optional group: there may not be a value. ( # Optional group: there may not be a value.
\s*=\s* # Equal Sign \s*=\s* # Equal Sign
@ -1121,7 +1184,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
| # or | # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or | # or
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
) # End of group 'val' ) # End of group 'val'
| # or | # or
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
@ -1129,7 +1192,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
)? # End of optional value group )? # End of optional value group
\s* # Any number of spaces. \s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS. (\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE) ''', re.ASCII | re.VERBOSE)
def load(self, data): def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
@ -1216,8 +1279,8 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
def _really_save(self, f, ignore_discard, ignore_expires): def _really_save(self, f, ignore_discard, ignore_expires):
now = time.time() now = time.time()
for cookie in self: for cookie in self:
if (not ignore_discard and cookie.discard if ((not ignore_discard and cookie.discard)
or not ignore_expires and cookie.is_expired(now)): or (not ignore_expires and cookie.is_expired(now))):
continue continue
name, value = cookie.name, cookie.value name, value = cookie.name, cookie.value
if value is None: if value is None:
@ -1225,14 +1288,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
# with no name, whereas http.cookiejar regards it as a # with no name, whereas http.cookiejar regards it as a
# cookie with no value. # cookie with no value.
name, value = '', name name, value = '', name
f.write('%s\n' % '\t'.join(( f.write('{}\n'.format('\t'.join((
cookie.domain, cookie.domain,
self._true_or_false(cookie.domain.startswith('.')), self._true_or_false(cookie.domain.startswith('.')),
cookie.path, cookie.path,
self._true_or_false(cookie.secure), self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''), str_or_none(cookie.expires, default=''),
name, value name, value,
))) ))))
def save(self, filename=None, ignore_discard=True, ignore_expires=True): def save(self, filename=None, ignore_discard=True, ignore_expires=True):
""" """
@ -1271,10 +1334,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
return line return line
cookie_list = line.split('\t') cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN: if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list) cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit(): if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line return line
cf = io.StringIO() cf = io.StringIO()

View File

@ -24,7 +24,7 @@ try:
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401
except ImportError: except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip() __version__ = f'broken {__version__}'.strip()

View File

@ -43,19 +43,28 @@ except Exception as _err:
try: try:
import sqlite3 import sqlite3
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
sqlite3._yt_dlp__version = sqlite3.sqlite_version
except ImportError: except ImportError:
# although sqlite3 is part of the standard library, it is possible to compile python without # although sqlite3 is part of the standard library, it is possible to compile Python without
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
sqlite3 = None sqlite3 = None
try: try:
import websockets import websockets
except (ImportError, SyntaxError): except ImportError:
# websockets 3.10 on python 3.6 causes SyntaxError
# See https://github.com/yt-dlp/yt-dlp/issues/2633
websockets = None websockets = None
try:
import urllib3
except ImportError:
urllib3 = None
try:
import requests
except ImportError:
requests = None
try: try:
import xattr # xattr or pyxattr import xattr # xattr or pyxattr
@ -65,6 +74,10 @@ else:
if hasattr(xattr, 'set'): # pyxattr if hasattr(xattr, 'set'): # pyxattr
xattr._yt_dlp__identifier = 'pyxattr' xattr._yt_dlp__identifier = 'pyxattr'
try:
import curl_cffi
except ImportError:
curl_cffi = None
from . import Cryptodome from . import Cryptodome

View File

@ -30,11 +30,12 @@ from .hls import HlsFD
from .http import HttpFD from .http import HttpFD
from .ism import IsmFD from .ism import IsmFD
from .mhtml import MhtmlFD from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD, NiconicoLiveFD from .niconico import NiconicoLiveFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD from .youtube_live_chat import YoutubeLiveChatFD
from .bunnycdn import BunnyCdnFD
PROTOCOL_MAP = { PROTOCOL_MAP = {
'rtmp': RtmpFD, 'rtmp': RtmpFD,
@ -49,12 +50,12 @@ PROTOCOL_MAP = {
'http_dash_segments_generator': DashSegmentsFD, 'http_dash_segments_generator': DashSegmentsFD,
'ism': IsmFD, 'ism': IsmFD,
'mhtml': MhtmlFD, 'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
'niconico_live': NiconicoLiveFD, 'niconico_live': NiconicoLiveFD,
'fc2_live': FC2LiveFD, 'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD, 'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD,
'bunnycdn': BunnyCdnFD,
} }
@ -65,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False):
'rtmp_ffmpeg': 'rtmpF', 'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash', 'http_dash_segments': 'dash',
'http_dash_segments_generator': 'dashG', 'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag', 'websocket_frag': 'WSfrag',
} }
if simplify: if simplify:

View File

@ -0,0 +1,50 @@
import hashlib
import random
import threading
from .common import FileDownloader
from . import HlsFD
from ..networking import Request
from ..networking.exceptions import network_exceptions
class BunnyCdnFD(FileDownloader):
"""
Downloads from BunnyCDN with required pings
Note, this is not a part of public API, and will be removed without notice.
DO NOT USE
"""
def real_download(self, filename, info_dict):
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
fd = HlsFD(self.ydl, self.params)
stop_event = threading.Event()
ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
ping_thread.start()
try:
return fd.real_download(filename, info_dict)
finally:
stop_event.set()
def ping_thread(self, stop_event, url, headers, secret, context_id):
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
ping_interval = 2
# Hard coded resolution as it doesn't seem to matter
res = 1080
paused = 'false'
current_time = 0
while not stop_event.wait(ping_interval):
current_time += ping_interval
time = current_time + round(random.random(), 6)
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
try:
self.ydl.urlopen(Request(ping_url, headers=headers)).read()
except network_exceptions as e:
self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')

View File

@ -4,6 +4,7 @@ import functools
import os import os
import random import random
import re import re
import threading
import time import time
from ..minicurses import ( from ..minicurses import (
@ -19,9 +20,7 @@ from ..utils import (
Namespace, Namespace,
RetryManager, RetryManager,
classproperty, classproperty,
decodeArgument,
deprecation_warning, deprecation_warning,
encodeFilename,
format_bytes, format_bytes,
join_nonempty, join_nonempty,
parse_bytes, parse_bytes,
@ -32,6 +31,7 @@ from ..utils import (
timetuple_from_msec, timetuple_from_msec,
try_call, try_call,
) )
from ..utils._utils import _ProgressState
class FileDownloader: class FileDownloader:
@ -63,6 +63,7 @@ class FileDownloader:
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
progress_delta: The minimum time between progress output, in seconds
external_downloader_args: A dictionary of downloader keys (in lower case) external_downloader_args: A dictionary of downloader keys (in lower case)
and a list of additional command-line arguments for the and a list of additional command-line arguments for the
executable. Use 'default' as the name for arguments to be executable. Use 'default' as the name for arguments to be
@ -88,6 +89,9 @@ class FileDownloader:
self.params = params self.params = params
self._prepare_multiline_status() self._prepare_multiline_status()
self.add_progress_hook(self.report_progress) self.add_progress_hook(self.report_progress)
if self.params.get('progress_delta'):
self._progress_delta_lock = threading.Lock()
self._progress_delta_time = time.monotonic()
def _set_ydl(self, ydl): def _set_ydl(self, ydl):
self.ydl = ydl self.ydl = ydl
@ -214,7 +218,7 @@ class FileDownloader:
def temp_name(self, filename): def temp_name(self, filename):
"""Returns a temporary filename for the given filename.""" """Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \ if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): (os.path.exists(filename) and not os.path.isfile(filename)):
return filename return filename
return filename + '.part' return filename + '.part'
@ -268,7 +272,7 @@ class FileDownloader:
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""
if last_modified_hdr is None: if last_modified_hdr is None:
return return
if not os.path.isfile(encodeFilename(filename)): if not os.path.isfile(filename):
return return
timestr = last_modified_hdr timestr = last_modified_hdr
if timestr is None: if timestr is None:
@ -330,7 +334,7 @@ class FileDownloader:
progress_dict), s.get('progress_idx') or 0) progress_dict), s.get('progress_idx') or 0)
self.to_console_title(self.ydl.evaluate_outtmpl( self.to_console_title(self.ydl.evaluate_outtmpl(
progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
progress_dict)) progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
def _format_progress(self, *args, **kwargs): def _format_progress(self, *args, **kwargs):
return self.ydl._format_text( return self.ydl._format_text(
@ -354,6 +358,7 @@ class FileDownloader:
'_speed_str': self.format_speed(speed).strip(), '_speed_str': self.format_speed(speed).strip(),
'_total_bytes_str': _format_bytes('total_bytes'), '_total_bytes_str': _format_bytes('total_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')), '_elapsed_str': self.format_seconds(s.get('elapsed')),
'_percent': 100.0,
'_percent_str': self.format_percent(100), '_percent_str': self.format_percent(100),
}) })
self._report_progress_status(s, join_nonempty( self._report_progress_status(s, join_nonempty(
@ -366,13 +371,21 @@ class FileDownloader:
if s['status'] != 'downloading': if s['status'] != 'downloading':
return return
if update_delta := self.params.get('progress_delta'):
with self._progress_delta_lock:
if time.monotonic() < self._progress_delta_time:
return
self._progress_delta_time += update_delta
progress = try_call(
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
lambda: s['downloaded_bytes'] == 0 and 0)
s.update({ s.update({
'_eta_str': self.format_eta(s.get('eta')).strip(), '_eta_str': self.format_eta(s.get('eta')).strip(),
'_speed_str': self.format_speed(s.get('speed')), '_speed_str': self.format_speed(s.get('speed')),
'_percent_str': self.format_percent(try_call( '_percent': progress,
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], '_percent_str': self.format_percent(progress),
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
lambda: s['downloaded_bytes'] == 0 and 0)),
'_total_bytes_str': _format_bytes('total_bytes'), '_total_bytes_str': _format_bytes('total_bytes'),
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'), '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
@ -393,7 +406,7 @@ class FileDownloader:
def report_resuming_byte(self, resume_len): def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte.""" """Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len) self.to_screen(f'[download] Resuming download at byte {resume_len}')
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
"""Report retry""" """Report retry"""
@ -421,13 +434,13 @@ class FileDownloader:
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(filename)
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) self.params.get('continuedl', True)
and os.path.isfile(encodeFilename(filename)) and os.path.isfile(filename)
and not self.params.get('nopart', False) and not self.params.get('nopart', False)
) )
@ -437,7 +450,7 @@ class FileDownloader:
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(filename),
}, info_dict) }, info_dict)
self._finish_multiline_status() self._finish_multiline_status()
return True, False return True, False
@ -478,9 +491,7 @@ class FileDownloader:
if not self.params.get('verbose', False): if not self.params.get('verbose', False):
return return
str_args = [decodeArgument(a) for a in args]
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(str_args)}') self.write_debug(f'{exe} command line: {shell_quote(args)}')

View File

@ -15,12 +15,15 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments' FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
self.report_error('Live DASH videos are not supported') real_downloader = None # No external FD can support --live-from-start
else:
if info_dict.get('is_live'):
self.report_error('Live DASH videos are not supported')
real_downloader = get_suitable_downloader(
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
real_start = time.time() real_start = time.time()
real_downloader = get_suitable_downloader(
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
args = [] args = []

View File

@ -1,4 +1,5 @@
import enum import enum
import functools
import json import json
import os import os
import re import re
@ -9,7 +10,6 @@ import time
import uuid import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools
from ..networking import Request from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import ( from ..utils import (
@ -23,7 +23,6 @@ from ..utils import (
cli_valueless_option, cli_valueless_option,
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename,
find_available_port, find_available_port,
remove_end, remove_end,
traverse_obj, traverse_obj,
@ -55,7 +54,7 @@ class ExternalFD(FragmentFD):
# correct and expected termination thus all postprocessing # correct and expected termination thus all postprocessing
# should take place # should take place
retval = 0 retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename()) self.to_screen(f'[{self.get_basename()}] Interrupted by user')
finally: finally:
if self._cookies_tempfile: if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile) self.try_remove(self._cookies_tempfile)
@ -67,7 +66,7 @@ class ExternalFD(FragmentFD):
'elapsed': time.time() - started, 'elapsed': time.time() - started,
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -108,7 +107,7 @@ class ExternalFD(FragmentFD):
return all(( return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
)) ))
@ -172,7 +171,7 @@ class ExternalFD(FragmentFD):
decrypt_fragment = self.decrypter(info_dict) decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb') dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']): for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) fragment_filename = f'{tmpfilename}-Frag{frag_index}'
try: try:
src, _ = self.sanitize_open(fragment_filename, 'rb') src, _ = self.sanitize_open(fragment_filename, 'rb')
except OSError as err: except OSError as err:
@ -184,9 +183,9 @@ class ExternalFD(FragmentFD):
dest.write(decrypt_fragment(fragment, src.read())) dest.write(decrypt_fragment(fragment, src.read()))
src.close() src.close()
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename)) self.try_remove(fragment_filename)
dest.close() dest.close()
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) self.try_remove(f'{tmpfilename}.frag.urls')
return 0 return 0
def _call_process(self, cmd, info_dict): def _call_process(self, cmd, info_dict):
@ -335,12 +334,12 @@ class Aria2cFD(ExternalFD):
cmd += ['--auto-file-renaming=false'] cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict: if 'fragments' in info_dict:
cmd += ['--file-allocation=none', '--uri-selector=inorder'] cmd += ['--uri-selector=inorder']
url_list_file = '%s.frag.urls' % tmpfilename url_list_file = f'{tmpfilename}.frag.urls'
url_list = [] url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']): for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb') stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode()) stream.write('\n'.join(url_list).encode())
stream.close() stream.close()
@ -357,7 +356,7 @@ class Aria2cFD(ExternalFD):
'id': sanitycheck, 'id': sanitycheck,
'method': method, 'method': method,
'params': [f'token:{rpc_secret}', *params], 'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8') }).encode()
request = Request( request = Request(
f'http://localhost:{rpc_port}/jsonrpc', f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={ data=d, headers={
@ -416,7 +415,7 @@ class Aria2cFD(ExternalFD):
'total_bytes_estimate': total, 'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1), 'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started 'elapsed': time.time() - started,
}) })
self._hook_progress(status, info_dict) self._hook_progress(status, info_dict)
@ -458,8 +457,6 @@ class FFmpegFD(ExternalFD):
@classmethod @classmethod
def available(cls, path=None): def available(cls, path=None):
# TODO: Fix path for ffmpeg
# Fixme: This may be wrong when --ffmpeg-location is used
return FFmpegPostProcessor().available return FFmpegPostProcessor().available
def on_process_started(self, proc, stdin): def on_process_started(self, proc, stdin):
@ -491,7 +488,7 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'): if not self.params.get('verbose'):
args += ['-hide_banner'] args += ['-hide_banner']
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
# These exists only for compatibility. Extractors should use # These exists only for compatibility. Extractors should use
# info_dict['downloader_options']['ffmpeg_args'] instead # info_dict['downloader_options']['ffmpeg_args'] instead
@ -508,13 +505,13 @@ class FFmpegFD(ExternalFD):
env = None env = None
proxy = self.params.get('proxy') proxy = self.params.get('proxy')
if proxy: if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy): if not re.match(r'[\da-zA-Z]+://', proxy):
proxy = 'http://%s' % proxy proxy = f'http://{proxy}'
if proxy.startswith('socks'): if proxy.startswith('socks'):
self.report_warning( self.report_warning(
'%s does not support SOCKS proxies. Downloading is likely to fail. ' f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.' % self.get_basename()) 'Consider adding --hls-prefer-native to your command.')
# Since December 2015 ffmpeg supports -http_proxy option (see # Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
@ -559,7 +556,7 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict] selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats): for i, fmt in enumerate(selected_formats):
is_http = re.match(r'^https?://', fmt['url']) is_http = re.match(r'https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies: if cookies:
args.extend(['-cookies', ''.join( args.extend(['-cookies', ''.join(
@ -575,7 +572,7 @@ class FFmpegFD(ExternalFD):
if end_time: if end_time:
args += ['-t', str(end_time - start_time)] args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy'] args += ['-c', 'copy']
@ -615,10 +612,12 @@ class FFmpegFD(ExternalFD):
else: else:
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
args += self._configuration_args(('_o1', '_o', '')) args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args] args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
self._debug_cmd(args) self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)

View File

@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
self.read_bytes(3) self.read_bytes(3)
quality_entry_count = self.read_unsigned_char() quality_entry_count = self.read_unsigned_char()
# QualityEntryCount # QualityEntryCount
for i in range(quality_entry_count): for _ in range(quality_entry_count):
self.read_string() self.read_string()
segment_run_count = self.read_unsigned_int() segment_run_count = self.read_unsigned_int()
segments = [] segments = []
for i in range(segment_run_count): for _ in range(segment_run_count):
first_segment = self.read_unsigned_int() first_segment = self.read_unsigned_int()
fragments_per_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int()
segments.append((first_segment, fragments_per_segment)) segments.append((first_segment, fragments_per_segment))
@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
quality_entry_count = self.read_unsigned_char() quality_entry_count = self.read_unsigned_char()
# QualitySegmentUrlModifiers # QualitySegmentUrlModifiers
for i in range(quality_entry_count): for _ in range(quality_entry_count):
self.read_string() self.read_string()
fragments_count = self.read_unsigned_int() fragments_count = self.read_unsigned_int()
fragments = [] fragments = []
for i in range(fragments_count): for _ in range(fragments_count):
first = self.read_unsigned_int() first = self.read_unsigned_int()
first_ts = self.read_unsigned_long_long() first_ts = self.read_unsigned_long_long()
duration = self.read_unsigned_int() duration = self.read_unsigned_int()
@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
self.read_string() # MovieIdentifier self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char() server_count = self.read_unsigned_char()
# ServerEntryTable # ServerEntryTable
for i in range(server_count): for _ in range(server_count):
self.read_string() self.read_string()
quality_count = self.read_unsigned_char() quality_count = self.read_unsigned_char()
# QualityEntryTable # QualityEntryTable
for i in range(quality_count): for _ in range(quality_count):
self.read_string() self.read_string()
# DrmData # DrmData
self.read_string() self.read_string()
@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
segments_count = self.read_unsigned_char() segments_count = self.read_unsigned_char()
segments = [] segments = []
for i in range(segments_count): for _ in range(segments_count):
box_size, box_type, box_data = self.read_box_info() box_size, box_type, box_data = self.read_box_info()
assert box_type == b'asrt' assert box_type == b'asrt'
segment = FlvReader(box_data).read_asrt() segment = FlvReader(box_data).read_asrt()
segments.append(segment) segments.append(segment)
fragments_run_count = self.read_unsigned_char() fragments_run_count = self.read_unsigned_char()
fragments = [] fragments = []
for i in range(fragments_run_count): for _ in range(fragments_run_count):
box_size, box_type, box_data = self.read_box_info() box_size, box_type, box_data = self.read_box_info()
assert box_type == b'afrt' assert box_type == b'afrt'
fragments.append(FlvReader(box_data).read_afrt()) fragments.append(FlvReader(box_data).read_afrt())
@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url man_url = urlh.url
@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
formats = sorted(formats, key=lambda f: f[0]) formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1] rate, media = formats[-1]
else: else:
rate, media = list(filter( rate, media = next(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0] lambda f: int(f[0]) == requested_bitrate, formats))
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url man_base_url = get_base_url(doc) or man_url

View File

@ -9,11 +9,11 @@ import time
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name
from ..networking import Request from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils import DownloadError, RetryManager, traverse_obj
from ..utils.networking import HTTPHeaderDict from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator
class HttpQuietDownloader(HttpFD): class HttpQuietDownloader(HttpFD):
@ -151,7 +151,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) self.try_remove(ctx['fragment_filename_sanitized'])
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
@ -187,7 +187,7 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
continuedl = self.params.get('continuedl', True) continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists: if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
@ -198,7 +198,7 @@ class FragmentFD(FileDownloader):
'.ytdl file is corrupt' if is_corrupt else '.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download') 'Inconsistent state of incomplete fragment download')
self.report_warning( self.report_warning(
'%s. Restarting from the beginning ...' % message) f'{message}. Restarting from the beginning ...')
ctx['fragment_index'] = resume_len = 0 ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx: if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt'] del ctx['ytdl_corrupt']
@ -226,8 +226,7 @@ class FragmentFD(FileDownloader):
resume_len = ctx['complete_frags_downloaded_bytes'] resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags'] total_frags = ctx['total_frags']
ctx_id = ctx.get('ctx_id') ctx_id = ctx.get('ctx_id')
# This dict stores the download progress, it's updated by the progress # Stores the download progress, updated by the progress hook
# hook
state = { state = {
'status': 'downloading', 'status': 'downloading',
'downloaded_bytes': resume_len, 'downloaded_bytes': resume_len,
@ -237,14 +236,8 @@ class FragmentFD(FileDownloader):
'tmpfilename': ctx['tmpfilename'], 'tmpfilename': ctx['tmpfilename'],
} }
start = time.time() ctx['started'] = time.time()
ctx.update({ progress = ProgressCalculator(resume_len)
'started': start,
'fragment_started': start,
# Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation
'prev_frag_downloaded_bytes': 0,
})
def frag_progress_hook(s): def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'): if s['status'] not in ('downloading', 'finished'):
@ -259,38 +252,35 @@ class FragmentFD(FileDownloader):
state['max_progress'] = ctx.get('max_progress') state['max_progress'] = ctx.get('max_progress')
state['progress_idx'] = ctx.get('progress_idx') state['progress_idx'] = ctx.get('progress_idx')
time_now = time.time() state['elapsed'] = progress.elapsed
state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0 frag_total_bytes = s.get('total_bytes') or 0
s['fragment_info_dict'] = s.pop('info_dict', {}) s['fragment_info_dict'] = s.pop('info_dict', {})
# XXX: Fragment resume is not accounted for here
if not ctx['live']: if not ctx['live']:
estimated_size = ( estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
/ (state['fragment_index'] + 1) * total_frags) / (state['fragment_index'] + 1) * total_frags)
state['total_bytes_estimate'] = estimated_size progress.total = estimated_size
progress.update(s.get('downloaded_bytes'))
state['total_bytes_estimate'] = progress.total
else:
progress.update(s.get('downloaded_bytes'))
if s['status'] == 'finished': if s['status'] == 'finished':
state['fragment_index'] += 1 state['fragment_index'] += 1
ctx['fragment_index'] = state['fragment_index'] ctx['fragment_index'] = state['fragment_index']
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] progress.thread_reset()
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
ctx['speed'] = state['speed'] = self.calc_speed( state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
ctx['fragment_started'], time_now, frag_total_bytes) state['speed'] = ctx['speed'] = progress.speed.smooth
ctx['fragment_started'] = time.time() state['eta'] = progress.eta.smooth
ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
if not ctx['live']:
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict) self._hook_progress(state, info_dict)
ctx['dl'].add_progress_hook(frag_progress_hook) ctx['dl'].add_progress_hook(frag_progress_hook)
return start return ctx['started']
def _finish_frag_download(self, ctx, info_dict): def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close() ctx['dest_stream'].close()
@ -375,10 +365,10 @@ class FragmentFD(FileDownloader):
return decrypt_fragment return decrypt_fragment
def download_and_append_fragments_multiple(self, *args, **kwargs): def download_and_append_fragments_multiple(self, *args, **kwargs):
''' """
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list all args must be either tuple or list
''' """
interrupt_trigger = [True] interrupt_trigger = [True]
max_progress = len(args) max_progress = len(args)
if max_progress == 1: if max_progress == 1:
@ -399,7 +389,7 @@ class FragmentFD(FileDownloader):
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
pass pass
if compat_os_name == 'nt': if os.name == 'nt':
def future_result(future): def future_result(future):
while True: while True:
try: try:
@ -433,7 +423,7 @@ class FragmentFD(FileDownloader):
finally: finally:
tpe.shutdown(wait=True) tpe.shutdown(wait=True)
if not interrupt_trigger[0] and not is_live: if not interrupt_trigger[0] and not is_live:
raise KeyboardInterrupt() raise KeyboardInterrupt
# we expect the user wants to stop and DO WANT the preceding postprocessors to run; # we expect the user wants to stop and DO WANT the preceding postprocessors to run;
# so returning a intermediate result here instead of KeyboardInterrupt on live # so returning a intermediate result here instead of KeyboardInterrupt on live
return result return result
@ -500,7 +490,6 @@ class FragmentFD(FileDownloader):
download_fragment(fragment, ctx_copy) download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
try: try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):

View File

@ -16,6 +16,7 @@ from ..utils import (
update_url_query, update_url_query,
urljoin, urljoin,
) )
from ..utils._utils import _request_dump_filename
class HlsFD(FragmentFD): class HlsFD(FragmentFD):
@ -72,11 +73,23 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) s = info_dict.get('hls_media_playlist_data')
man_url = urlh.url if s:
s = urlh.read().decode('utf-8', 'ignore') self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info')
else:
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
s_bytes = urlh.read()
if self.params.get('write_pages'):
dump_filename = _request_dump_filename(
man_url, info_dict['id'], None,
trim_length=self.params.get('trim_file_name'))
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
with open(dump_filename, 'wb') as outf:
outf.write(s_bytes)
s = s_bytes.decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download: if can_download:
@ -119,12 +132,12 @@ class HlsFD(FragmentFD):
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
def is_ad_fragment_end(s): def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
fragments = [] fragments = []
@ -160,10 +173,12 @@ class HlsFD(FragmentFD):
extra_state = ctx.setdefault('extra_state', {}) extra_state = ctx.setdefault('extra_state', {})
format_index = info_dict.get('format_index') format_index = info_dict.get('format_index')
extra_query = None extra_segment_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
if extra_param_to_segment_url: extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) extra_key_query = None
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
@ -175,6 +190,7 @@ class HlsFD(FragmentFD):
if external_aes_iv: if external_aes_iv:
external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
byte_range = {} byte_range = {}
byte_range_offset = 0
discontinuity_count = 0 discontinuity_count = 0
frag_index = 0 frag_index = 0
ad_frag_next = False ad_frag_next = False
@ -190,8 +206,8 @@ class HlsFD(FragmentFD):
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
frag_url = urljoin(man_url, line) frag_url = urljoin(man_url, line)
if extra_query: if extra_segment_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_segment_query)
fragments.append({ fragments.append({
'frag_index': frag_index, 'frag_index': frag_index,
@ -202,6 +218,11 @@ class HlsFD(FragmentFD):
}) })
media_sequence += 1 media_sequence += 1
# If the byte_range is truthy, reset it after appending a fragment that uses it
if byte_range:
byte_range_offset = byte_range['end']
byte_range = {}
elif line.startswith('#EXT-X-MAP'): elif line.startswith('#EXT-X-MAP'):
if format_index and discontinuity_count != format_index: if format_index and discontinuity_count != format_index:
continue continue
@ -212,13 +233,15 @@ class HlsFD(FragmentFD):
frag_index += 1 frag_index += 1
map_info = parse_m3u8_attributes(line[11:]) map_info = parse_m3u8_attributes(line[11:])
frag_url = urljoin(man_url, map_info.get('URI')) frag_url = urljoin(man_url, map_info.get('URI'))
if extra_query: if extra_segment_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_segment_query)
map_byte_range = {}
if map_info.get('BYTERANGE'): if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@') splitted_byte_range = map_info.get('BYTERANGE').split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0
byte_range = { map_byte_range = {
'start': sub_range_start, 'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]), 'end': sub_range_start + int(splitted_byte_range[0]),
} }
@ -227,8 +250,8 @@ class HlsFD(FragmentFD):
'frag_index': frag_index, 'frag_index': frag_index,
'url': frag_url, 'url': frag_url,
'decrypt_info': decrypt_info, 'decrypt_info': decrypt_info,
'byte_range': byte_range, 'byte_range': map_byte_range,
'media_sequence': media_sequence 'media_sequence': media_sequence,
}) })
media_sequence += 1 media_sequence += 1
@ -244,8 +267,10 @@ class HlsFD(FragmentFD):
decrypt_info['KEY'] = external_aes_key decrypt_info['KEY'] = external_aes_key
else: else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_query: if extra_key_query or extra_segment_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) # Fall back to extra_segment_query to key for backwards compat
decrypt_info['URI'] = update_url_query(
decrypt_info['URI'], extra_key_query or extra_segment_query)
if decrypt_url != decrypt_info['URI']: if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None decrypt_info['KEY'] = None
@ -253,7 +278,7 @@ class HlsFD(FragmentFD):
media_sequence = int(line[22:]) media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'): elif line.startswith('#EXT-X-BYTERANGE'):
splitted_byte_range = line[17:].split('@') splitted_byte_range = line[17:].split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset
byte_range = { byte_range = {
'start': sub_range_start, 'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]), 'end': sub_range_start + int(splitted_byte_range[0]),
@ -350,9 +375,8 @@ class HlsFD(FragmentFD):
# XXX: this should probably be silent as well # XXX: this should probably be silent as well
# or verify that all segments contain the same data # or verify that all segments contain the same data
self.report_warning(bug_reports_message( self.report_warning(bug_reports_message(
'Discarding a %s block found in the middle of the stream; ' f'Discarding a {type(block).__name__} block found in the middle of the stream; '
'if the subtitles display incorrectly,' 'if the subtitles display incorrectly,'))
% (type(block).__name__)))
continue continue
block.write_into(output) block.write_into(output)
@ -369,7 +393,10 @@ class HlsFD(FragmentFD):
return output.getvalue().encode() return output.getvalue().encode()
self.download_and_append_fragments( if len(fragments) == 1:
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) self.download_and_append_fragments(ctx, fragments, info_dict)
else:
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
else: else:
return self.download_and_append_fragments(ctx, fragments, info_dict) return self.download_and_append_fragments(ctx, fragments, info_dict)

View File

@ -15,7 +15,6 @@ from ..utils import (
ThrottledDownload, ThrottledDownload,
XAttrMetadataError, XAttrMetadataError,
XAttrUnavailableError, XAttrUnavailableError,
encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
try_call, try_call,
@ -58,9 +57,8 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(ctx.tmpfilename):
ctx.resume_len = os.path.getsize( ctx.resume_len = os.path.getsize(ctx.tmpfilename)
encodeFilename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -176,7 +174,7 @@ class HttpFD(FileDownloader):
'downloaded_bytes': ctx.resume_len, 'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len,
}, info_dict) }, info_dict)
raise SucceedDownload() raise SucceedDownload
else: else:
# The length does not match, we start the download over # The length does not match, we start the download over
self.report_unable_to_resume() self.report_unable_to_resume()
@ -194,7 +192,7 @@ class HttpFD(FileDownloader):
def close_stream(): def close_stream():
if ctx.stream is not None: if ctx.stream is not None:
if not ctx.tmpfilename == '-': if ctx.tmpfilename != '-':
ctx.stream.close() ctx.stream.close()
ctx.stream = None ctx.stream = None
@ -237,8 +235,13 @@ class HttpFD(FileDownloader):
def retry(e): def retry(e):
close_stream() close_stream()
ctx.resume_len = (byte_counter if ctx.tmpfilename == '-' if ctx.tmpfilename == '-':
else os.path.getsize(encodeFilename(ctx.tmpfilename))) ctx.resume_len = byte_counter
else:
try:
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
except FileNotFoundError:
ctx.resume_len = 0
raise RetryDownload(e) raise RetryDownload(e)
while True: while True:
@ -263,20 +266,20 @@ class HttpFD(FileDownloader):
ctx.filename = self.undo_temp_name(ctx.tmpfilename) ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename) self.report_destination(ctx.filename)
except OSError as err: except OSError as err:
self.report_error('unable to open for writing: %s' % str(err)) self.report_error(f'unable to open for writing: {err}')
return False return False
if self.params.get('xattr_set_filesize', False) and data_len is not None: if self.params.get('xattr_set_filesize', False) and data_len is not None:
try: try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err: except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err)) self.report_error(f'unable to set filesize xattr: {err}')
try: try:
ctx.stream.write(data_block) ctx.stream.write(data_block)
except OSError as err: except OSError as err:
self.to_stderr('\n') self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err)) self.report_error(f'unable to write data: {err}')
return False return False
# Apply rate limit # Apply rate limit
@ -322,7 +325,7 @@ class HttpFD(FileDownloader):
elif now - ctx.throttle_start > 3: elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-': if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close() ctx.stream.close()
raise ThrottledDownload() raise ThrottledDownload
elif speed: elif speed:
ctx.throttle_start = None ctx.throttle_start = None
@ -333,7 +336,7 @@ class HttpFD(FileDownloader):
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
raise NextFragment() raise NextFragment
if ctx.tmpfilename != '-': if ctx.tmpfilename != '-':
ctx.stream.close() ctx.stream.close()

View File

@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0 frag_index = 0
for i, segment in enumerate(segments): for segment in segments:
frag_index += 1 frag_index += 1
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue

View File

@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD): class MhtmlFD(FragmentFD):
_STYLESHEET = """\ _STYLESHEET = '''\
html, body { html, body {
margin: 0; margin: 0;
padding: 0; padding: 0;
@ -45,7 +45,7 @@ body > figure > img {
max-width: 100%; max-width: 100%;
max-height: calc(100vh - 5em); max-height: calc(100vh - 5em);
} }
""" '''
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
@ -57,24 +57,19 @@ body > figure > img {
)).decode('us-ascii') + '?=' )).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary): def _gen_cid(self, i, fragment, frag_boundary):
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
def _gen_stub(self, *, fragments, frag_boundary, title): def _gen_stub(self, *, fragments, frag_boundary, title):
output = io.StringIO() output = io.StringIO()
output.write(( output.write(
'<!DOCTYPE html>' '<!DOCTYPE html>'
'<html>' '<html>'
'<head>' '<head>'
'' '<meta name="generator" content="yt-dlp {version}">' f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
'' '<title>{title}</title>' f'<title>{escapeHTML(title)}</title>'
'' '<style>{styles}</style>' f'<style>{self._STYLESHEET}</style>'
'<body>' '<body>')
).format(
version=escapeHTML(YT_DLP_VERSION),
styles=self._STYLESHEET,
title=escapeHTML(title)
))
t0 = 0 t0 = 0
for i, frag in enumerate(fragments): for i, frag in enumerate(fragments):
@ -87,15 +82,12 @@ body > figure > img {
num=i + 1, num=i + 1,
t0=srt_subtitles_timecode(t0), t0=srt_subtitles_timecode(t0),
t1=srt_subtitles_timecode(t1), t1=srt_subtitles_timecode(t1),
duration=formatSeconds(frag['duration'], msec=True) duration=formatSeconds(frag['duration'], msec=True),
)) ))
except (KeyError, ValueError, TypeError): except (KeyError, ValueError, TypeError):
t1 = None t1 = None
output.write(( output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
'<figcaption>Slide #{num}</figcaption>' output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
).format(num=i + 1))
output.write('<img src="cid:{cid}">'.format(
cid=self._gen_cid(i, frag, frag_boundary)))
output.write('</figure>') output.write('</figure>')
t0 = t1 t0 = t1
@ -126,31 +118,24 @@ body > figure > img {
stub = self._gen_stub( stub = self._gen_stub(
fragments=fragments, fragments=fragments,
frag_boundary=frag_boundary, frag_boundary=frag_boundary,
title=title title=title,
) )
ctx['dest_stream'].write(( ctx['dest_stream'].write((
'MIME-Version: 1.0\r\n' 'MIME-Version: 1.0\r\n'
'From: <nowhere@yt-dlp.github.io.invalid>\r\n' 'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
'To: <nowhere@yt-dlp.github.io.invalid>\r\n' 'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
'Subject: {title}\r\n' f'Subject: {self._escape_mime(title)}\r\n'
'Content-type: multipart/related; ' 'Content-type: multipart/related; '
'' 'boundary="{boundary}"; ' f'boundary="{frag_boundary}"; '
'' 'type="text/html"\r\n' 'type="text/html"\r\n'
'X.yt-dlp.Origin: {origin}\r\n' f'X.yt-dlp.Origin: {origin}\r\n'
'\r\n' '\r\n'
'--{boundary}\r\n' f'--{frag_boundary}\r\n'
'Content-Type: text/html; charset=utf-8\r\n' 'Content-Type: text/html; charset=utf-8\r\n'
'Content-Length: {length}\r\n' f'Content-Length: {len(stub)}\r\n'
'\r\n' '\r\n'
'{stub}\r\n' f'{stub}\r\n').encode())
).format(
origin=origin,
boundary=frag_boundary,
length=len(stub),
title=self._escape_mime(title),
stub=stub
).encode())
extra_state['header_written'] = True extra_state['header_written'] = True
for i, fragment in enumerate(fragments): for i, fragment in enumerate(fragments):

View File

@ -2,58 +2,10 @@ import json
import threading import threading
import time import time
from . import get_suitable_downloader
from .common import FileDownloader from .common import FileDownloader
from .external import FFmpegFD from .external import FFmpegFD
from ..networking import Request from ..networking import Request
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get from ..utils import DownloadError, str_or_none, try_get
class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
def real_download(self, filename, info_dict):
from ..extractor.niconico import NiconicoIE
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
success = download_complete = False
timer = [None]
heartbeat_lock = threading.Lock()
heartbeat_url = heartbeat_info_dict['url']
heartbeat_data = heartbeat_info_dict['data'].encode()
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
request = Request(heartbeat_url, heartbeat_data)
def heartbeat():
try:
self.ydl.urlopen(request).read()
except Exception:
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
with heartbeat_lock:
if not download_complete:
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
timer[0].start()
heartbeat_info_dict['ping']()
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
try:
heartbeat()
if type(fd).__name__ == 'HlsFD':
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
success = fd.real_download(filename, info_dict)
finally:
if heartbeat_lock:
with heartbeat_lock:
timer[0].cancel()
download_complete = True
return success
class NiconicoLiveFD(FileDownloader): class NiconicoLiveFD(FileDownloader):
@ -64,7 +16,6 @@ class NiconicoLiveFD(FileDownloader):
ws_url = info_dict['url'] ws_url = info_dict['url']
ws_extractor = info_dict['ws'] ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin'] ws_origin_host = info_dict['origin']
cookies = info_dict.get('cookies')
live_quality = info_dict.get('live_quality', 'high') live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high') live_latency = info_dict.get('live_latency', 'high')
dl = FFmpegFD(self.ydl, self.params or {}) dl = FFmpegFD(self.ydl, self.params or {})
@ -76,12 +27,7 @@ class NiconicoLiveFD(FileDownloader):
def communicate_ws(reconnect): def communicate_ws(reconnect):
if reconnect: if reconnect:
ws = WebSocketsWrapper(ws_url, { ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
'Cookies': str_or_none(cookies) or '',
'Origin': f'https://{ws_origin_host}',
'Accept': '*/*',
'User-Agent': self.params['http_headers']['User-Agent'],
})
if self.ydl.params.get('verbose', False): if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request') self.to_screen('[debug] Sending startWatching request')
ws.send(json.dumps({ ws.send(json.dumps({
@ -91,14 +37,15 @@ class NiconicoLiveFD(FileDownloader):
'quality': live_quality, 'quality': live_quality,
'protocol': 'hls+fmp4', 'protocol': 'hls+fmp4',
'latency': live_latency, 'latency': live_latency,
'chasePlay': False 'accessRightMethod': 'single_cookie',
'chasePlay': False,
}, },
'room': { 'room': {
'protocol': 'webSocket', 'protocol': 'webSocket',
'commentable': True 'commentable': True,
}, },
'reconnect': True, 'reconnect': True,
} },
})) }))
else: else:
ws = ws_extractor ws = ws_extractor
@ -124,7 +71,7 @@ class NiconicoLiveFD(FileDownloader):
elif self.ydl.params.get('verbose', False): elif self.ydl.params.get('verbose', False):
if len(recv) > 100: if len(recv) > 100:
recv = recv[:100] + '...' recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv) self.to_screen(f'[debug] Server said: {recv}')
def ws_main(): def ws_main():
reconnect = False reconnect = False
@ -134,7 +81,7 @@ class NiconicoLiveFD(FileDownloader):
if ret is True: if ret is True:
return return
except BaseException as e: except BaseException as e:
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
time.sleep(10) time.sleep(10)
continue continue
finally: finally:

View File

@ -8,7 +8,6 @@ from ..utils import (
Popen, Popen,
check_executable, check_executable,
encodeArgument, encodeArgument,
encodeFilename,
get_exe_version, get_exe_version,
) )
@ -179,15 +178,15 @@ class RtmpFD(FileDownloader):
return False return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(tmpfilename)
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume'] args = [*basic_args, '--resume']
if retval == RD_FAILED: if retval == RD_FAILED:
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encodeArgument(a) for a in args]
retval = run_rtmpdump(args) retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@ -196,8 +195,8 @@ class RtmpFD(FileDownloader):
retval = RD_SUCCESS retval = RD_SUCCESS
break break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,

View File

@ -2,7 +2,7 @@ import os
import subprocess import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..utils import check_executable, encodeFilename from ..utils import check_executable
class RtspFD(FileDownloader): class RtspFD(FileDownloader):
@ -26,7 +26,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View File

@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
video_id = info_dict['video_id'] video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME) self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
self.report_warning('Live chat download runs until the livestream ends. ' self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate yt-dlp instance') 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
@ -123,8 +123,8 @@ class YoutubeLiveChatFD(FragmentFD):
data, data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
or frag_index == 1 and try_refresh_replay_beginning or (frag_index == 1 and try_refresh_replay_beginning)
or parse_actions_replay) or parse_actions_replay)
return (True, *func(live_chat_continuation)) return (True, *func(live_chat_continuation))
except HTTPError as err: except HTTPError as err:

View File

@ -1,16 +1,25 @@
from ..compat.compat_utils import passthrough_module from ..compat.compat_utils import passthrough_module
from ..globals import extractors as _extractors_context
from ..globals import plugin_ies as _plugin_ies_context
from ..plugins import PluginSpec, register_plugin_spec
passthrough_module(__name__, '.extractors') passthrough_module(__name__, '.extractors')
del passthrough_module del passthrough_module
register_plugin_spec(PluginSpec(
module_name='extractor',
suffix='IE',
destination=_extractors_context,
plugin_destination=_plugin_ies_context,
))
def gen_extractor_classes(): def gen_extractor_classes():
""" Return a list of supported extractors. """ Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
""" """
from .extractors import _ALL_CLASSES import_extractors()
return list(_extractors_context.value.values())
return _ALL_CLASSES
def gen_extractors(): def gen_extractors():
@ -37,6 +46,9 @@ def list_extractors(age_limit=None):
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""
from . import extractors import_extractors()
return _extractors_context.value[f'{ie_name}IE']
return getattr(extractors, f'{ie_name}IE')
def import_extractors():
from . import extractors # noqa: F401

File diff suppressed because it is too large Load Diff

View File

@ -4,18 +4,18 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
dict_get,
ExtractorError, ExtractorError,
js_to_json, dict_get,
int_or_none, int_or_none,
js_to_json,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
url_or_none,
) )
@ -66,7 +66,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'WWI Centenary', 'title': 'WWI Centenary',
'description': 'md5:c2379ec0ca84072e86b446e536954546', 'description': 'md5:c2379ec0ca84072e86b446e536954546',
} },
}, { }, {
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
'info_dict': { 'info_dict': {
@ -74,7 +74,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
} },
}, { }, {
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
'info_dict': { 'info_dict': {
@ -85,7 +85,7 @@ class ABCIE(InfoExtractor):
'upload_date': '20200813', 'upload_date': '20200813',
'uploader': 'Behind the News', 'uploader': 'Behind the News',
'uploader_id': 'behindthenews', 'uploader_id': 'behindthenews',
} },
}, { }, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': { 'info_dict': {
@ -94,7 +94,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -125,7 +125,7 @@ class ABCIE(InfoExtractor):
if mobj is None: if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None) expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired: if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
raise ExtractorError('Unable to extract video urls') raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json( urls_info = self._parse_json(
@ -163,7 +163,7 @@ class ABCIE(InfoExtractor):
'height': height, 'height': height,
'tbr': bitrate, 'tbr': bitrate,
'filesize': int_or_none(url_info.get('filesize')), 'filesize': int_or_none(url_info.get('filesize')),
'format_id': format_id 'format_id': format_id,
}) })
return { return {
@ -180,20 +180,100 @@ class ABCIViewIE(InfoExtractor):
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU'] _GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{ _TESTS = [{
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
'info_dict': {
'id': 'CO1211V001S00',
'ext': 'mp4',
'title': 'Series 1 Ep 1 Wood For The Trees',
'series': 'Utopia',
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
'upload_date': '20230726',
'uploader_id': 'abc1',
'series_id': 'CO1211V',
'episode_id': 'CO1211V001S00',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Wood For The Trees',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
'timestamp': 1690403700,
},
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name',
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
'md5': '67715ce3c78426b11ba167d875ac6abf', 'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': { 'info_dict': {
'id': 'LE1927H001S00', 'id': 'LE1927H001S00',
'ext': 'mp4', 'ext': 'mp4',
'title': "Series 11 Ep 1", 'title': 'Series 11 Ep 1',
'series': "Gruen", 'series': 'Gruen',
'description': 'md5:52cc744ad35045baf6aded2ce7287f67', 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
'upload_date': '20190925', 'upload_date': '20190925',
'uploader_id': 'abc1', 'uploader_id': 'abc1',
'series_id': 'LE1927H',
'episode_id': 'LE1927H001S00',
'season_number': 11,
'season': 'Season 11',
'episode_number': 1,
'episode': 'Episode 1',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
'timestamp': 1569445289, 'timestamp': 1569445289,
}, },
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode number',
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
'md5': '77cb7d8434440e3b28fbebe331c2456a',
'info_dict': {
'id': 'NC2203H039S00',
'ext': 'mp4',
'title': 'Series 2022 Locking Up Kids',
'series': 'Four Corners',
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
'upload_date': '20221114',
'uploader_id': 'abc1',
'series_id': 'NC2203H',
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name or number',
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
'md5': '2e17dec06b13cc81dc119d2565289396',
'info_dict': {
'id': 'RF2004Q043S00',
'ext': 'mp4',
'title': 'Series 2021',
'series': 'Landline',
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
'upload_date': '20211205',
'uploader_id': 'abc1',
'series_id': 'RF2004Q',
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
@ -207,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
house_number = video_params.get('episodeHouseNumber') or video_id house_number = video_params.get('episodeHouseNumber') or video_id
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
int(time.time()), house_number)
sig = hmac.new( sig = hmac.new(
b'android.content.res.Resources', b'android.content.res.Resources',
path.encode('utf-8'), hashlib.sha256).hexdigest() path.encode(), hashlib.sha256).hexdigest()
token = self._download_webpage( token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) f'http://iview.abc.net.au{path}&sig={sig}', video_id)
def tokenize_url(url, token): def tokenize_url(url, token):
return update_url_query(url, { return update_url_query(url, {
@ -222,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
for sd in ('1080', '720', 'sd', 'sd-low'): for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get( sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str) stream, lambda x: x['streams']['hls'][sd], str)
if not sd_url: if not sd_url:
continue continue
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
@ -255,6 +334,8 @@ class ABCIViewIE(InfoExtractor):
'episode_number': int_or_none(self._search_regex( 'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number, 'episode_id': house_number,
'episode': self._search_regex(
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
'uploader_id': video_params.get('channel'), 'uploader_id': video_params.get('channel'),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
@ -275,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'description': 'md5:93119346c24a7c322d446d8eece430ff', 'description': 'md5:93119346c24a7c322d446d8eece430ff',
'series': 'Upper Middle Bogan', 'series': 'Upper Middle Bogan',
'season': 'Series 1', 'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
}, },
'playlist_count': 8, 'playlist_count': 8,
}, { }, {
@ -294,17 +375,39 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'noplaylist': True, 'noplaylist': True,
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
# 'videoEpisodes' is a dict with `items` key
'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
'info_dict': {
'id': '178458-0',
'title': 'Episodes',
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
'series': '7.30 Mark Humphries Satire',
'season': 'Episodes',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
'skip': 'This program is not currently available in ABC iview',
}, {
'url': 'https://iview.abc.net.au/show/inbestigators',
'info_dict': {
'id': '175343-1',
'title': 'Series 1',
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
'series': 'The Inbestigators',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
},
'playlist_count': 17,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id) webpage = self._download_webpage(url, show_id)
webpage_data = self._search_regex( video_data = self._search_json(
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
webpage, 'initial state') transform_source=lambda x: x.encode().decode('unicode_escape'),
video_data = self._parse_json( end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'): if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
@ -313,12 +416,14 @@ class ABCIViewShowSeriesIE(InfoExtractor):
series = video_data['selectedSeries'] series = video_data['selectedSeries']
return { return {
'_type': 'playlist', '_type': 'playlist',
'entries': [self.url_result(episode['shareUrl']) 'entries': [self.url_result(episode_url, ABCIViewIE)
for episode in series['_embedded']['videoEpisodes']], for episode_url in traverse_obj(series, (
'_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
'id': series.get('id'), 'id': series.get('id'),
'title': dict_get(series, ('title', 'displaySubtitle')), 'title': dict_get(series, ('title', 'displaySubtitle')),
'description': series.get('description'), 'description': series.get('description'),
'series': dict_get(series, ('showTitle', 'displayTitle')), 'series': dict_get(series, ('showTitle', 'displayTitle')),
'season': dict_get(series, ('title', 'displaySubtitle')), 'season': dict_get(series, ('title', 'displaySubtitle')),
'thumbnail': series.get('thumbnail'), 'thumbnail': traverse_obj(
series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
} }

View File

@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
video_id = mobj.group('id') video_id = mobj.group('id')
info_dict = self._extract_feed_info( info_dict = self._extract_feed_info(
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) f'http://abcnews.go.com/video/itemfeed?id={video_id}')
info_dict.update({ info_dict.update({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,

View File

@ -1,5 +1,4 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
dict_get, dict_get,
int_or_none, int_or_none,
@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
data = self._download_json( data = self._download_json(
'https://api.abcotvs.com/v2/content', display_id, query={ 'https://api.abcotvs.com/v2/content', display_id, query={
'id': video_id, 'id': video_id,
'key': 'otv.web.%s.story' % station, 'key': f'otv.web.{station}.story',
'station': station, 'station': station,
})['data'] })['data']
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
title = video.get('title') or video['linkText'] title = video.get('title') or video['linkText']
formats = [] formats = []

View File

@ -6,53 +6,54 @@ import hmac
import io import io
import json import json
import re import re
import struct
import time import time
import urllib.parse import urllib.parse
import urllib.request
import urllib.response
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_ecb_decrypt from ..aes import aes_ecb_decrypt
from ..networking import RequestHandler, Response
from ..networking.exceptions import TransportError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
bytes_to_intlist, OnDemandPagedList,
decode_base_n, decode_base_n,
int_or_none, int_or_none,
intlist_to_bytes,
OnDemandPagedList,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url,
update_url_query, update_url_query,
) )
def add_opener(ydl, handler): # FIXME: Create proper API in .networking class AbemaLicenseRH(RequestHandler):
"""Add a handler for opening URLs, like _download_webpage""" _SUPPORTED_URL_SCHEMES = ('abematv-license',)
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 _SUPPORTED_PROXY_SCHEMES = None
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 _SUPPORTED_FEATURES = None
rh = ydl._request_director.handlers['Urllib'] RH_NAME = 'abematv_license'
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
assert isinstance(opener, urllib.request.OpenerDirector)
opener.add_handler(handler)
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
class AbemaLicenseHandler(urllib.request.BaseHandler): def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
handler_order = 499 super().__init__(**kwargs)
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
# the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
self.ie = ie self.ie = ie
def _send(self, request):
url = request.url
ticket = urllib.parse.urlparse(url).netloc
try:
response_data = self._get_videokey_from_ticket(ticket)
except ExtractorError as e:
raise TransportError(cause=e.cause) from e
except (IndexError, KeyError, TypeError) as e:
raise TransportError(cause=repr(e)) from e
return Response(
io.BytesIO(response_data), url,
headers={'Content-Length': str(len(response_data))})
def _get_videokey_from_ticket(self, ticket): def _get_videokey_from_ticket(self, ticket):
to_show = self.ie.get_param('verbose', False) to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show) media_token = self.ie._get_media_token(to_show=to_show)
@ -62,33 +63,27 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
query={'t': media_token}, query={'t': media_token},
data=json.dumps({ data=json.dumps({
'kv': 'a', 'kv': 'a',
'lt': ticket 'lt': ticket,
}).encode('utf-8'), }).encode(),
headers={ headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
res = decode_base_n(license_response['k'], table=self.STRTABLE) res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) encvideokey = list(res.to_bytes(16, 'big'))
h = hmac.new( h = hmac.new(
binascii.unhexlify(self.HKEY), binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), (license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256) digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest()) enckey = list(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': str(len(response_data)),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv'
_USERTOKEN = None _USERTOKEN = None
_DEVICE_ID = None _DEVICE_ID = None
_MEDIATOKEN = None _MEDIATOKEN = None
@ -97,11 +92,11 @@ class AbemaTVBaseIE(InfoExtractor):
@classmethod @classmethod
def _generate_aks(cls, deviceid): def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8') deviceid = deviceid.encode()
# add 1 hour and then drop minute and secs # add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds() // 3600 + 1) * 3600) ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
time_struct = time.gmtime(ts_1hour) time_struct = time.gmtime(ts_1hour)
ts_1hour_str = str(ts_1hour).encode('utf-8') ts_1hour_str = str(ts_1hour).encode()
tmp = None tmp = None
@ -113,7 +108,7 @@ class AbemaTVBaseIE(InfoExtractor):
def mix_tmp(count): def mix_tmp(count):
nonlocal tmp nonlocal tmp
for i in range(count): for _ in range(count):
mix_once(tmp) mix_once(tmp)
def mix_twist(nonce): def mix_twist(nonce):
@ -133,11 +128,15 @@ class AbemaTVBaseIE(InfoExtractor):
if self._USERTOKEN: if self._USERTOKEN:
return self._USERTOKEN return self._USERTOKEN
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
username, _ = self._get_login_info() username, _ = self._get_login_info()
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
if AbemaTVBaseIE._USERTOKEN: if AbemaTVBaseIE._USERTOKEN:
# try authentication with locally stored token # try authentication with locally stored token
try: try:
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
self._get_media_token(True) self._get_media_token(True)
return return
except ExtractorError as e: except ExtractorError as e:
@ -150,13 +149,12 @@ class AbemaTVBaseIE(InfoExtractor):
data=json.dumps({ data=json.dumps({
'deviceId': self._DEVICE_ID, 'deviceId': self._DEVICE_ID,
'applicationKeySecret': aks, 'applicationKeySecret': aks,
}).encode('utf-8'), }).encode(),
headers={ headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
AbemaTVBaseIE._USERTOKEN = user_data['token'] AbemaTVBaseIE._USERTOKEN = user_data['token']
add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True): def _get_media_token(self, invalidate=False, to_show=True):
@ -171,13 +169,44 @@ class AbemaTVBaseIE(InfoExtractor):
'osLang': 'ja_JP', 'osLang': 'ja_JP',
'osTimezone': 'Asia/Tokyo', 'osTimezone': 'Asia/Tokyo',
'appId': 'tv.abema', 'appId': 'tv.abema',
'appVersion': '3.27.1' 'appVersion': '3.27.1',
}, headers={ }, headers={
'Authorization': f'bearer {self._get_device_token()}', 'Authorization': f'bearer {self._get_device_token()}',
})['token'] })['token']
return self._MEDIATOKEN return self._MEDIATOKEN
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password,
}).encode(), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
auth_cache = {
'device_id': AbemaTVBaseIE._DEVICE_ID,
'usertoken': AbemaTVBaseIE._USERTOKEN,
}
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'): def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
return self._download_json( return self._download_json(
f'https://api.abema.io/{endpoint}', video_id, query=query or {}, f'https://api.abema.io/{endpoint}', video_id, query=query or {},
@ -201,14 +230,14 @@ class AbemaTVBaseIE(InfoExtractor):
class AbemaTVIE(AbemaTVBaseIE): class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)' _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{ _TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1', 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': { 'info_dict': {
'id': '194-25_s2_p1', 'id': '194-25_s2_p1',
'title': '第1話 「チーズケーキ」 「モーニング再び」', 'title': '第1話 「チーズケーキ」 「モーニング再び」',
'series': '異世界食堂2', 'series': '異世界食堂2',
'series_number': 2, 'season': 'シーズン2',
'season_number': 2,
'episode': '第1話 「チーズケーキ」 「モーニング再び」', 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
'episode_number': 1, 'episode_number': 1,
}, },
@ -220,7 +249,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】', 'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON', 'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】', 'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2, 'season_number': 2,
'episode_number': 1, 'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17', 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
}, },
@ -249,33 +278,6 @@ class AbemaTVIE(AbemaTVBaseIE):
}] }]
_TIMETABLE = None _TIMETABLE = None
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
def _real_extract(self, url): def _real_extract(self, url):
# starting download using infojson from this extractor is undefined behavior, # starting download using infojson from this extractor is undefined behavior,
# and never be fixed in the future; you must trigger downloads by directly specifying URL. # and never be fixed in the future; you must trigger downloads by directly specifying URL.
@ -331,7 +333,7 @@ class AbemaTVIE(AbemaTVBaseIE):
description = self._html_search_regex( description = self._html_search_regex(
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div', (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',), r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
webpage, 'description', default=None, group=1) webpage, 'description', default=None, group=1)
if not description: if not description:
og_desc = self._html_search_meta( og_desc = self._html_search_meta(
@ -344,17 +346,18 @@ class AbemaTVIE(AbemaTVBaseIE):
)? )?
''', r'\1', og_desc) ''', r'\1', og_desc)
# canonical URL may contain series and episode number # canonical URL may contain season and episode number
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url) mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
if mobj: if mobj:
seri = int_or_none(mobj.group(1), default=float('inf')) seri = int_or_none(mobj.group(1), default=float('inf'))
epis = int_or_none(mobj.group(2), default=float('inf')) epis = int_or_none(mobj.group(2), default=float('inf'))
info['series_number'] = seri if seri < 100 else None info['season_number'] = seri if seri < 100 else None
# some anime like Detective Conan (though not available in AbemaTV) # some anime like Detective Conan (though not available in AbemaTV)
# has more than 1000 episodes (1026 as of 2021/11/15) # has more than 1000 episodes (1026 as of 2021/11/15)
info['episode_number'] = epis if epis < 2000 else None info['episode_number'] = epis if epis < 2000 else None
is_live, m3u8_url = False, None is_live, m3u8_url = False, None
availability = 'public'
if video_type == 'now-on-air': if video_type == 'now-on-air':
is_live = True is_live = True
channel_url = 'https://api.abema.io/v1/channels' channel_url = 'https://api.abema.io/v1/channels'
@ -372,13 +375,13 @@ class AbemaTVIE(AbemaTVBaseIE):
f'https://api.abema.io/v1/video/programs/{video_id}', video_id, f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
note='Checking playability', note='Checking playability',
headers=headers) headers=headers)
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) if not traverse_obj(api_response, ('label', 'free', {bool})):
if 3 not in ondemand_types:
# cannot acquire decryption key for these streams # cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream') self.report_warning('This is a premium-only stream')
availability = 'premium_only'
info.update(traverse_obj(api_response, { info.update(traverse_obj(api_response, {
'series': ('series', 'title'), 'series': ('series', 'title'),
'season': ('season', 'title'), 'season': ('season', 'name'),
'season_number': ('season', 'sequence'), 'season_number': ('season', 'sequence'),
'episode_number': ('episode', 'number'), 'episode_number': ('episode', 'number'),
})) }))
@ -395,6 +398,7 @@ class AbemaTVIE(AbemaTVBaseIE):
headers=headers) headers=headers)
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False): if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
self.report_warning('This is a premium-only stream') self.report_warning('This is a premium-only stream')
availability = 'premium_only'
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8' m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
else: else:
@ -412,19 +416,25 @@ class AbemaTVIE(AbemaTVBaseIE):
'description': description, 'description': description,
'formats': formats, 'formats': formats,
'is_live': is_live, 'is_live': is_live,
'availability': availability,
}) })
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
info['thumbnails'] = [{'url': thumbnail}]
return info return info
class AbemaTVTitleIE(AbemaTVBaseIE): class AbemaTVTitleIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)' _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?'
_PAGE_SIZE = 25 _PAGE_SIZE = 25
_TESTS = [{ _TESTS = [{
'url': 'https://abema.tv/video/title/90-1597', 'url': 'https://abema.tv/video/title/90-1887',
'info_dict': { 'info_dict': {
'id': '90-1597', 'id': '90-1887',
'title': 'シャッフルアイランド', 'title': 'シャッフルアイランド',
'description': 'md5:61b2425308f41a5282a926edda66f178',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, { }, {
@ -432,41 +442,54 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
'info_dict': { 'info_dict': {
'id': '193-132', 'id': '193-132',
'title': '真心が届く~僕とスターのオフィス・ラブ!?~', 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
'description': 'md5:9b59493d1f3a792bafbc7319258e7af8',
}, },
'playlist_mincount': 16, 'playlist_mincount': 16,
}, { }, {
'url': 'https://abema.tv/video/title/25-102', 'url': 'https://abema.tv/video/title/25-1nzan-whrxe',
'info_dict': { 'info_dict': {
'id': '25-102', 'id': '25-1nzan-whrxe',
'title': 'ソードアート・オンライン アリシゼーション', 'title': 'ソードアート・オンライン',
'description': 'md5:c094904052322e6978495532bdbf06e6',
}, },
'playlist_mincount': 24, 'playlist_mincount': 25,
}, {
'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40',
'info_dict': {
'title': '〈物語〉シリーズ',
'id': '26-2mzbynr-cph',
'description': 'md5:e67873de1c88f360af1f0a4b84847a52',
},
'playlist_count': 59,
}] }]
def _fetch_page(self, playlist_id, series_version, page): def _fetch_page(self, playlist_id, series_version, season_id, page):
query = {
'seriesVersion': series_version,
'offset': str(page * self._PAGE_SIZE),
'order': 'seq',
'limit': str(self._PAGE_SIZE),
}
if season_id:
query['seasonId'] = season_id
programs = self._call_api( programs = self._call_api(
f'v1/video/series/{playlist_id}/programs', playlist_id, f'v1/video/series/{playlist_id}/programs', playlist_id,
note=f'Downloading page {page + 1}', note=f'Downloading page {page + 1}',
query={ query=query)
'seriesVersion': series_version,
'offset': str(page * self._PAGE_SIZE),
'order': 'seq',
'limit': str(self._PAGE_SIZE),
})
yield from ( yield from (
self.url_result(f'https://abema.tv/video/episode/{x}') self.url_result(f'https://abema.tv/video/episode/{x}')
for x in traverse_obj(programs, ('programs', ..., 'id'))) for x in traverse_obj(programs, ('programs', ..., 'id')))
def _entries(self, playlist_id, series_version): def _entries(self, playlist_id, series_version, season_id):
return OnDemandPagedList( return OnDemandPagedList(
functools.partial(self._fetch_page, playlist_id, series_version), functools.partial(self._fetch_page, playlist_id, series_version, season_id),
self._PAGE_SIZE) self._PAGE_SIZE)
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id, season_id = self._match_valid_url(url).group('id', 'season')
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id) series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
return self.playlist_result( return self.playlist_result(
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id, self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id,
playlist_title=series_info.get('title'), playlist_title=series_info.get('title'),
playlist_description=series_info.get('content')) playlist_description=series_info.get('content'))

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor): class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course' IE_NAME = 'AcademicEarth:Course'
_TEST = { _TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/', 'url': 'http://academicearth.org/playlists/laws-of-nature/',

View File

@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE):
_VALID_URL = r'''(?x: _VALID_URL = r'''(?x:
https?:// https?://
(?: (?:
(?:(?:embed|www)\.)?acast\.com/| (?:(?:embed|www|shows)\.)?acast\.com/|
play\.acast\.com/s/ play\.acast\.com/s/
) )
(?P<channel>[^/]+)/(?P<id>[^/#?"]+) (?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+)
)''' )'''
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})'] _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{ _TESTS = [{
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna',
'info_dict': { 'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3', 'ext': 'mp3',
@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE):
'timestamp': 1477346700, 'timestamp': 1477346700,
'upload_date': '20161024', 'upload_date': '20161024',
'duration': 2766, 'duration': 2766,
'creator': 'Third Ear Studio', 'creators': ['Third Ear Studio'],
'series': 'Spår', 'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna', 'episode': '2. Raggarmordet - Röster ur det förflutna',
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg', 'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
@ -67,13 +67,16 @@ class ACastIE(ACastBaseIE):
'display_id': '2.raggarmordet-rosterurdetforflutna', 'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4, 'season_number': 4,
'season': 'Season 4', 'season': 'Season 4',
} },
}, { }, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2', 'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'only_matching': True,
}, { }, {
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
'only_matching': True, 'only_matching': True,
@ -93,13 +96,13 @@ class ACastIE(ACastBaseIE):
'series': 'Democracy Sausage with Mark Kenny', 'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362, 'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16', 'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups() channel, display_id = self._match_valid_url(url).groups()
episode = self._call_api( episode = self._call_api(
'%s/episodes/%s' % (channel, display_id), f'{channel}/episodes/{display_id}',
display_id, {'showInfo': 'true'}) display_id, {'showInfo': 'true'})
return self._extract_episode( return self._extract_episode(
episode, self._extract_show_info(episode.get('show') or {})) episode, self._extract_show_info(episode.get('show') or {}))
@ -110,7 +113,7 @@ class ACastChannelIE(ACastBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?acast\.com/| (?:(?:www|shows)\.)?acast\.com/|
play\.acast\.com/s/ play\.acast\.com/s/
) )
(?P<id>[^/#?]+) (?P<id>[^/#?]+)
@ -120,17 +123,20 @@ class ACastChannelIE(ACastBaseIE):
'info_dict': { 'info_dict': {
'id': '4efc5294-5385-4847-98bd-519799ce5786', 'id': '4efc5294-5385-4847-98bd-519799ce5786',
'title': 'Today in Focus', 'title': 'Today in Focus',
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae', 'description': 'md5:feca253de9947634605080cd9eeea2bf',
}, },
'playlist_mincount': 200, 'playlist_mincount': 200,
}, { }, {
'url': 'http://play.acast.com/s/ft-banking-weekly', 'url': 'http://play.acast.com/s/ft-banking-weekly',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://shows.acast.com/sparpodcast',
'only_matching': True,
}] }]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) return False if ACastIE.suitable(url) else super().suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
show_slug = self._match_id(url) show_slug = self._match_id(url)

View File

@ -3,9 +3,10 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
traverse_obj,
parse_codecs, parse_codecs,
parse_qs, parse_qs,
str_or_none,
traverse_obj,
) )
@ -24,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
'width': int_or_none(video.get('width')), 'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')), 'height': int_or_none(video.get('height')),
'tbr': float_or_none(video.get('avgBitrate')), 'tbr': float_or_none(video.get('avgBitrate')),
**parse_codecs(video.get('codecs', '')) **parse_codecs(video.get('codecs', '')),
}) })
return { return {
@ -76,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
'comment_count': int, 'comment_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)', 'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '红孩儿之趴趴蛙寻石记 第5话 ', 'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0, 'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记', 'season': '红孩儿之趴趴蛙寻石记',
'season_id': 5023171, 'season_id': '5023171',
'season_number': 1, # series has only 1 season 'season_number': 1, # series has only 1 season
'episode': 'Episode 5', 'episode': 'Episode 5',
'episode_number': 5, 'episode_number': 5,
@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '叽歪老表(第二季) 第5话 坚不可摧', 'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)', 'season': '叽歪老表(第二季)',
'season_number': 2, 'season_number': 2,
'season_id': 6065485, 'season_id': '6065485',
'episode': '坚不可摧', 'episode': '坚不可摧',
'episode_number': 5, 'episode_number': 5,
'upload_date': '20220324', 'upload_date': '20220324',
@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': json_bangumi_data.get('showTitle'), 'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'), 'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'), 'season': json_bangumi_data.get('bangumiTitle'),
'season_id': season_id, 'season_id': str_or_none(season_id),
'season_number': season_number, 'season_number': season_number,
'episode': json_bangumi_data.get('title'), 'episode': json_bangumi_data.get('title'),
'episode_number': episode_number, 'episode_number': episode_number,

View File

@ -3,33 +3,53 @@ import binascii
import json import json
import os import os
import random import random
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long,
ExtractorError, ExtractorError,
ass_subtitles_timecode,
bytes_to_long,
float_or_none, float_or_none,
int_or_none, int_or_none,
intlist_to_bytes, join_nonempty,
long_to_bytes, long_to_bytes,
parse_iso8601,
pkcs1pad, pkcs1pad,
str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class ADNIE(InfoExtractor): class ADNBaseIE(InfoExtractor):
IE_DESC = 'Animation Digital Network' IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' _NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = f'https://gw.api.{_BASE}/'
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', 'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261', 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': { 'info_dict': {
'id': '9841', 'id': '9841',
@ -44,29 +64,32 @@ class ADNIE(InfoExtractor):
'season_number': 1, 'season_number': 1,
'episode': 'À ce soir !', 'episode': 'À ce soir !',
'episode_number': 1, 'episode_number': 1,
'thumbnail': str,
'season': 'Season 1',
}, },
'skip': 'Only available in region (FR, ...)', 'skip': 'Only available in French and German speaking Europe',
}, { }, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', 'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
'only_matching': True, 'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
'ext': 'mp4',
'episode_number': 1,
'duration': 1417,
'release_date': '20231004',
'series': 'The Eminence in Shadow',
'season_number': 2,
'episode': str,
'title': str,
'thumbnail': str,
'season': 'Season 2',
'comment_count': int,
'average_rating': float,
'description': str,
},
# 'skip': 'Only available in French and German speaking Europe',
}] }]
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
def _get_subtitles(self, sub_url, video_id): def _get_subtitles(self, sub_url, video_id):
if not sub_url: if not sub_url:
return None return None
@ -83,9 +106,9 @@ class ADNIE(InfoExtractor):
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]), base64.b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + '7fac1178830cfe0c'), binascii.unhexlify(self._K + '7fac1178830cfe0c'),
compat_b64decode(enc_subtitles[:24]))) base64.b64decode(enc_subtitles[:24])))
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
if not subtitles_json: if not subtitles_json:
return None return None
@ -108,7 +131,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if start is None or end is None or text is None: if start is None or end is None or text is None:
continue continue
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
ass_subtitles_timecode(start), ass_subtitles_timecode(start),
ass_subtitles_timecode(end), ass_subtitles_timecode(end),
'{\\a%d}' % alignment if alignment != 2 else '', '{\\a%d}' % alignment if alignment != 2 else '',
@ -116,6 +139,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if sub_lang == 'vostf': if sub_lang == 'vostf':
sub_lang = 'fr' sub_lang = 'fr'
elif sub_lang == 'vostde':
sub_lang = 'de'
subtitles.setdefault(sub_lang, []).extend([{ subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json', 'ext': 'json',
'data': json.dumps(sub), 'data': json.dumps(sub),
@ -137,7 +162,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'username': username, 'username': username,
})) or {}).get('accessToken') })) or {}).get('accessToken')
if access_token: if access_token:
self._HEADERS = {'authorization': 'Bearer ' + access_token} self._HEADERS['Authorization'] = f'Bearer {access_token}'
except ExtractorError as e: except ExtractorError as e:
message = None message = None
if isinstance(e.cause, HTTPError) and e.cause.status == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
@ -147,8 +172,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self.report_warning(message or self._LOGIN_ERR_MESSAGE) self.report_warning(message or self._LOGIN_ERR_MESSAGE)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) lang, video_id = self._match_valid_url(url).group('lang', 'id')
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id self._HEADERS['X-Target-Distribution'] = lang or 'fr'
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json( player = self._download_json(
video_base_url + 'configuration', video_id, video_base_url + 'configuration', video_id,
'Downloading player config JSON metadata', 'Downloading player config JSON metadata',
@ -157,26 +183,29 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
user = options['user'] user = options['user']
if not user.get('hasAccess'): if not user.get('hasAccess'):
self.raise_login_required() start_date = traverse_obj(options, ('video', 'startDate', {str}))
if (parse_iso8601(start_date) or 0) > time.time():
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
self.raise_login_required('This video requires a subscription', method='password')
token = self._download_json( token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
video_id, 'Downloading access token', headers={ video_id, 'Downloading access token', headers={
'x-player-refresh-token': user['refreshToken'] 'X-Player-Refresh-Token': user['refreshToken'],
}, data=b'')['token'] }, data=b'')['token']
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join(random.choices('0123456789abcdef', k=16)) self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({ message = list(json.dumps({
'k': self._K, 'k': self._K,
't': token, 't': token,
})) }).encode())
# Sometimes authentication fails for no good reason, retry with # Sometimes authentication fails for no good reason, retry with
# a different random padding # a different random padding
links_data = None links_data = None
for _ in range(3): for _ in range(3):
padded_message = intlist_to_bytes(pkcs1pad(message, 128)) padded_message = bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode() authorization = base64.b64encode(encrypted_message).decode()
@ -184,12 +213,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
try: try:
links_data = self._download_json( links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={ links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization 'X-Player-Token': authorization,
**self._HEADERS,
}, query={ }, query={
'freeWithAds': 'true', 'freeWithAds': 'true',
'adaptive': 'false', 'adaptive': 'false',
'withMetadata': 'true', 'withMetadata': 'true',
'source': 'Web' 'source': 'Web',
}) })
break break
except ExtractorError as e: except ExtractorError as e:
@ -202,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
error = self._parse_json(e.cause.response.read(), video_id) error = self._parse_json(e.cause.response.read(), video_id)
message = error.get('message') message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)
raise ExtractorError(message) raise ExtractorError(message)
else: else:
@ -221,7 +251,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for quality, load_balancer_url in qualities.items(): for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json( load_balancer_data = self._download_json(
load_balancer_url, video_id, load_balancer_url, video_id,
'Downloading %s %s JSON metadata' % (format_id, quality), f'Downloading {format_id} {quality} JSON metadata',
headers=self._HEADERS,
fatal=False) or {} fatal=False) or {}
m3u8_url = load_balancer_data.get('location') m3u8_url = load_balancer_data.get('location')
if not m3u8_url: if not m3u8_url:
@ -232,11 +263,17 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if format_id == 'vf': if format_id == 'vf':
for f in m3u8_formats: for f in m3u8_formats:
f['language'] = 'fr' f['language'] = 'fr'
elif format_id == 'vde':
for f in m3u8_formats:
f['language'] = 'de'
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
if not formats:
self.raise_login_required('This video requires a subscription', method='password')
video = (self._download_json( video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id, self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {} 'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
show = video.get('show') or {} show = video.get('show') or {}
return { return {
@ -255,3 +292,38 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'average_rating': float_or_none(video.get('rating') or metas.get('rating')), 'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
'comment_count': int_or_none(video.get('commentsCount')), 'comment_count': int_or_none(video.get('commentsCount')),
} }
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
'title': 'Tokyo Mew Mew New',
},
# 'skip': 'Only available in French end German speaking Europe',
}]
def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id'])
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers=self._HEADERS, query={
'order': 'asc',
'limit': '-1',
})
def entries():
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
yield self.url_result(join_nonempty(
'https://animationdigitalnetwork.com', lang, 'video',
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
return self.playlist_result(entries(), show_id, show.get('title'))

View File

@ -1,8 +1,6 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
class AdobeConnectIE(InfoExtractor): class AdobeConnectIE(InfoExtractor):
@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage) title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true' is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = [] formats = []
for con_string in qs['conStrings'][0].split(','): for con_string in qs['conStrings'][0].split(','):
formats.append({ formats.append({
'format_id': con_string.split('://')[0], 'format_id': con_string.split('://')[0],
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), 'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'ext': 'flv', 'ext': 'flv',
'play_path': 'mp4:' + qs['streamName'][0], 'play_path': 'mp4:' + qs['streamName'][0],
'rtmp_conn': 'S:' + qs['ticket'][0], 'rtmp_conn': 'S:' + qs['ticket'][0],

File diff suppressed because it is too large Load Diff

View File

@ -2,13 +2,12 @@ import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ISO639Utils,
OnDemandPagedList,
float_or_none, float_or_none,
int_or_none, int_or_none,
ISO639Utils,
join_nonempty, join_nonempty,
OnDemandPagedList,
parse_duration, parse_duration,
str_or_none, str_or_none,
str_to_int, str_to_int,
@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
return subtitles return subtitles
def _parse_video_data(self, video_data): def _parse_video_data(self, video_data):
video_id = compat_str(video_data['id']) video_id = str(video_data['id'])
title = video_data['title'] title = video_data['title']
s3_extracted = False s3_extracted = False
@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
page += 1 page += 1
query['page'] = page query['page'] = page
for element_data in self._call_api( for element_data in self._call_api(
self._RESOURCE, display_id, query, 'Download Page %d' % page): self._RESOURCE, display_id, query, f'Download Page {page}'):
yield self._process_data(element_data) yield self._process_data(element_data)
def _extract_playlist_entries(self, display_id, query): def _extract_playlist_entries(self, display_id, query):

View File

@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
getShowBySlug(slug:"%s") { getShowBySlug(slug:"%s") {
%%s %%s
} }
}''' % show_path }''' % show_path # noqa: UP031
if episode_path: if episode_path:
query = query % '''title query = query % '''title
getVideoBySlug(slug:"%s") { getVideoBySlug(slug:"%s") {
@ -107,7 +107,6 @@ class AdultSwimIE(TurnerBaseIE):
title title
tvRating tvRating
}''' % episode_path }''' % episode_path
['getVideoBySlug']
else: else:
query = query % '''metaDescription query = query % '''metaDescription
title title
@ -129,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
episode_title = title = video_data['title'] episode_title = title = video_data['title']
series = show_data.get('title') series = show_data.get('title')
if series: if series:
title = '%s - %s' % (series, title) title = f'{series} - {title}'
info = { info = {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -192,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
if not slug: if not slug:
continue continue
entries.append(self.url_result( entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug), f'http://adultswim.com/videos/{show_path}/{slug}',
'AdultSwim', video.get('_id'))) 'AdultSwim', video.get('_id')))
return self.playlist_result( return self.playlist_result(
entries, show_path, show_data.get('title'), entries, show_path, show_data.get('title'),

View File

@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
def _extract_aetn_info(self, domain, filter_key, filter_value, url): def _extract_aetn_info(self, domain, filter_key, filter_value, url):
requestor_id, brand = self._DOMAIN_MAP[domain] requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json( result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
filter_value, query={'filter[%s]' % filter_key: filter_value}) filter_value, query={f'filter[{filter_key}]': filter_value})
result = traverse_obj( result = traverse_obj(
result, ('results', result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value), lambda k, v: k == 0 and v[filter_key] == filter_value),
@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'], requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating']) traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
auth = self._extract_mvpd_auth( auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource) url, video_id, requestor_id, resource)
info.update(self._extract_aen_smil(media_url, video_id, auth)) info.update(self._extract_aen_smil(media_url, video_id, auth))
@ -121,18 +121,28 @@ class AENetworksIE(AENetworksBaseIE):
'info_dict': { 'info_dict': {
'id': '22253814', 'id': '22253814',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Winter is Coming', 'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', 'description': 'md5:a40e370925074260b1c8a633c632c63a',
'timestamp': 1338306241, 'timestamp': 1338306241,
'upload_date': '20120529', 'upload_date': '20120529',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 2592.0,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:5',
'tags': 'count:14',
'categories': ['Mountain Men'],
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'season_number': 1,
'series': 'Mountain Men',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'Geo-restricted - This content is not available in your location.' 'skip': 'Geo-restricted - This content is not available in your location.',
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': { 'info_dict': {
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
'timestamp': 1452634428, 'timestamp': 1452634428,
'upload_date': '20160112', 'upload_date': '20160112',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 1277.695,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'tags': 'count:23',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 9',
'season_number': 9,
'series': 'Duck Dynasty',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -152,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
'skip': 'This video is only available for users of participating TV providers.', 'skip': 'This video is only available for users of participating TV providers.',
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie', 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.history.com/videos/history-of-valentines-day', 'url': 'http://www.history.com/videos/history-of-valentines-day',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape', 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
'only_matching': True 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -190,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
%s(slug: "%s") { %s(slug: "%s") {
%s %s
} }
}''' % (resource, slug, fields), }''' % (resource, slug, fields), # noqa: UP031
}))['data'][resource] }))['data'][resource]
def _real_extract(self, url): def _real_extract(self, url):
domain, slug = self._match_valid_url(url).groups() domain, slug = self._match_valid_url(url).groups()
_, brand = self._DOMAIN_MAP[domain] _, brand = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = 'http://watch.%s' % domain base_url = f'http://watch.{domain}'
entries = [] entries = []
for item in (playlist.get(self._ITEMS_KEY) or []): for item in (playlist.get(self._ITEMS_KEY) or []):
@ -229,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us', 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://www.historyvault.com/collections/mysteryquest', 'url': 'https://www.historyvault.com/collections/mysteryquest',
'only_matching': True 'only_matching': True,
}] }]
_RESOURCE = 'list' _RESOURCE = 'list'
_ITEMS_KEY = 'items' _ITEMS_KEY = 'items'
@ -290,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'info_dict': { 'info_dict': {
'id': '40700995724', 'id': '40700995724',
'ext': 'mp4', 'ext': 'mp4',
'title': "History of Valentines Day", 'title': 'History of Valentines Day',
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729, 'timestamp': 1375819729,
'upload_date': '20130806', 'upload_date': '20130806',
@ -338,12 +357,13 @@ class BiographyIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': '404 Not Found',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
player_url = self._search_regex( player_url = self._search_regex(
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL, rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
webpage, 'player URL') webpage, 'player URL')
return self.url_result(player_url, HistoryPlayerIE.ie_key()) return self.url_result(player_url, HistoryPlayerIE.ie_key())

View File

@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Semiconductor', 'uploader': 'Semiconductor',
'uploader_id': 'semiconductor', 'uploader_id': 'semiconductor',
'uploader_url': 'https://vimeo.com/semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor',
'duration': 348 'duration': 348,
} },
}, { }, {
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
'md5': '03582d795382e49f2fd0b427b55de409', 'md5': '03582d795382e49f2fd0b427b55de409',
@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Aeon Video', 'uploader': 'Aeon Video',
'uploader_id': 'aeonvideo', 'uploader_id': 'aeonvideo',
'uploader_url': 'https://vimeo.com/aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo',
'duration': 1344 'duration': 1344,
} },
}, { }, {
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',

View File

@ -1,142 +1,26 @@
import datetime as dt
import functools import functools
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
date_from_str, UserNotLive,
determine_ext, determine_ext,
filter_dict,
int_or_none, int_or_none,
qualities, orderedSet,
traverse_obj, parse_iso8601,
unified_strdate,
unified_timestamp,
update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
xpath_text, urljoin,
) )
from ..utils.traversal import traverse_obj
class AfreecaTVIE(InfoExtractor): class AfreecaTVBaseIE(InfoExtractor):
IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)
'''
_NETRC_MACHINE = 'afreecatv' _NETRC_MACHINE = 'afreecatv'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
'info_dict': {
'id': '36164052',
'ext': 'mp4',
'title': '데일리 에이프릴 요정들의 시상식!',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160503',
},
'skip': 'Video is gone',
}, {
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': {
'id': '36153164',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '36153164_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '36153164_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}],
'skip': 'Video is gone',
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4',
'title': '혼자사는여자집',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
'duration': 213,
},
'params': {
'skip_download': True,
},
}, {
# adult content
'url': 'https://vod.afreecatv.com/player/97267690',
'info_dict': {
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20180327',
'duration': 3601,
},
'params': {
'skip_download': True,
},
'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
'upload_date': '20230108',
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
def parse_video_key(key):
video_key = {}
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m:
video_key['upload_date'] = m.group('upload_date')
video_key['part'] = int(m.group('part'))
return video_key
def _perform_login(self, username, password): def _perform_login(self, username, password):
login_form = { login_form = {
@ -150,21 +34,21 @@ class AfreecaTVIE(InfoExtractor):
} }
response = self._download_json( response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None, 'https://login.sooplive.co.kr/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form)) 'Logging in', data=urlencode_postdata(login_form))
_ERRORS = { _ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.', -4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php', -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php', -6: 'https://login.sooplive.co.kr/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php', -9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php', -11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php', -12: 'https://member.sooplive.co.kr/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.', 0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.', -3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', -7: 'You cannot use your Global Soop account to access Korean Soop.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.', -32008: 'You have failed to log in. Please contact our Help Center.',
} }
@ -173,169 +57,206 @@ class AfreecaTVIE(InfoExtractor):
if result != 1: if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.') error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError( raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error), f'Unable to login: {self.IE_NAME} said: {error}',
expected=True) expected=True)
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
return self._download_json(Request(
f'https://api.m.sooplive.co.kr/{endpoint}',
data=data, headers=headers, query=query,
extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop'
IE_DESC = 'sooplive.co.kr'
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
_TESTS = [{
'url': 'https://vod.sooplive.co.kr/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
'upload_date': '20230108',
'timestamp': 1673186405,
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}, {
# non standard key
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4',
'title': '혼자사는여자집',
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
'timestamp': 1491929865,
'duration': 213,
},
'params': {
'skip_download': True,
},
}, {
# adult content
'url': 'https://vod.sooplive.co.kr/player/97267690',
'info_dict': {
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
'title': '[생]빨개요♥ (part 1)',
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20180327',
'duration': 3601,
},
'params': {
'skip_download': True,
},
'skip': 'The VOD does not exist',
}, {
# adult content
'url': 'https://vod.sooplive.co.kr/player/70395877',
'only_matching': True,
}, {
# subscribers only
'url': 'https://vod.sooplive.co.kr/player/104647403',
'only_matching': True,
}, {
# private
'url': 'https://vod.sooplive.co.kr/player/81669846',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._call_api(
partial_view = False 'station/video/a/view', video_id, headers={'Referer': url},
adult_view = False data=urlencode_postdata({
for _ in range(2):
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view',
video_id, headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
if traverse_obj(data, ('code', {int})) == -6221:
raise ExtractorError('The VOD does not exist', expected=True)
query = {
'nTitleNo': video_id, 'nTitleNo': video_id,
'nStationNo': data['station_no'], 'nApiLevel': 10,
'nBbsNo': data['bbs_no'], }))['data']
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'
if adult_view:
query['adultView'] = 'ADULT_VIEW'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
% (' (skipping adult)' if partial_view else ''),
video_id, headers={
'Referer': url,
}, query=query)
flag = xpath_text(video_xml, './track/flag', 'flag', default=None) error_code = traverse_obj(data, ('code', {int}))
if flag and flag == 'SUCCEED': if error_code == -6221:
break raise ExtractorError('The VOD does not exist', expected=True)
if flag == 'PARTIAL_ADULT': elif error_code == -6205:
self.report_warning( raise ExtractorError('This VOD is private', expected=True)
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
if not adult_view:
adult_view = True
continue
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall('./track/video')[-1] common_info = traverse_obj(data, {
if video_element is None or video_element.text is None: 'title': ('title', {str}),
raise ExtractorError( 'uploader': ('writer_nick', {str}),
'Video %s does not exist' % video_id, expected=True) 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
video_url = video_element.text.strip() 'thumbnails': ('thumb', {self._fixup_thumb}),
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
}) })
if not video_url: entries = []
entries = [] for file_num, file_element in enumerate(
file_elements = video_element.findall('./file') traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
one = len(file_elements) == 1 file_url = file_element['file']
for file_num, file_element in enumerate(file_elements, start=1): if determine_ext(file_url) == 'm3u8':
file_url = url_or_none(file_element.text) formats = self._extract_m3u8_formats(
if not file_url: file_url, video_id, 'mp4', m3u8_id='hls',
continue note=f'Downloading part {file_num} m3u8 information')
key = file_element.get('key', '') else:
upload_date = unified_strdate(self._search_regex( formats = [{
r'^(\d{8})_', key, 'upload date', default=None)) 'url': file_url,
if upload_date is not None: 'format_id': 'http',
# sometimes the upload date isn't included in the file name }]
# instead, another random ID is, which may parse as a valid
# date but be wildly out of a reasonable range
parsed_date = date_from_str(upload_date)
if parsed_date.year < 2000 or parsed_date.year >= 2100:
upload_date = None
file_duration = int_or_none(file_element.get('duration'))
format_id = key if key else '%s_%s' % (video_id, file_num)
if determine_ext(file_url) == 'm3u8':
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
else:
formats = [{
'url': file_url,
'format_id': 'http',
}]
if not formats and not self.get_param('ignore_no_formats'):
continue
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': title if one else '%s (part %d)' % (title, file_num),
'upload_date': upload_date,
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = { entries.append({
'id': video_id, **common_info,
'title': title, 'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
'uploader': uploader, 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
'uploader_id': uploader_id, 'formats': formats,
'duration': duration, **traverse_obj(file_element, {
'thumbnail': thumbnail, 'duration': ('duration', {int_or_none(scale=1000)}),
} 'timestamp': ('file_start', {parse_iso8601(delimiter=' ', timezone=dt.timedelta(hours=9))}),
}),
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
}) })
return info if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
if not entries:
self.raise_login_required(
'Only users older than 19 are able to watch this video', method='password')
self.report_warning(
'In accordance with local laws and regulations, underage users are '
'restricted from watching adult content. Only content suitable for all '
f'ages will be downloaded. {self._login_hint("password")}')
if not entries and traverse_obj(data, ('sub_upload_type', {str})):
self.raise_login_required('This VOD is for subscribers only', method='password')
if len(entries) == 1:
return {
**entries[0],
'title': common_info.get('title'),
}
common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'soop:catchstory'
IE_NAME = 'afreecatv:live' IE_DESC = 'sooplive.co.kr catch story'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?' _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
_TESTS = [{ _TESTS = [{
'url': 'https://play.afreecatv.com/pyh3646/237852185', 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
'info_dict': {
'id': '103247',
},
'playlist_count': 2,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._call_api(
'catchstory/a/view', video_id, headers={'Referer': url},
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
return self.playlist_result(self._entries(data), video_id)
def _entries(self, data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}),
}))
class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'soop:live'
IE_DESC = 'sooplive.co.kr livestreams'
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
_TESTS = [{
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'info_dict': { 'info_dict': {
'id': '237852185', 'id': '237852185',
'ext': 'mp4', 'ext': 'mp4',
@ -347,94 +268,121 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
}, },
'skip': 'Livestream has ended', 'skip': 'Livestream has ended',
}, { }, {
'url': 'http://play.afreeca.com/pyh3646/237852185', 'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://play.afreeca.com/pyh3646', 'url': 'https://play.sooplive.co.kr/pyh3646',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
_WORKING_CDNS = [
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
]
_BAD_CDNS = [
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
]
_QUALITIES = ('sd', 'hd', 'hd2k', 'original') def _extract_formats(self, channel_info, broadcast_no, aid):
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
default_cdn_ids = orderedSet([
*traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
*self._WORKING_CDNS,
])
cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
for attempt, cdn_id in enumerate(cdn_ids, start=1):
m3u8_url = traverse_obj(self._download_json(
urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
fatal=False, query={
'return_type': cdn_id,
'broad_key': f'{broadcast_no}-common-master-hls',
}), ('view_url', {url_or_none}))
try:
return self._extract_m3u8_formats(
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
headers={'Referer': 'https://play.sooplive.co.kr/'})
except ExtractorError as e:
if attempt == len(cdn_ids):
raise
self.report_warning(
f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
def _real_extract(self, url): def _real_extract(self, url):
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno') broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
password = self.get_param('videopassword') channel_info = traverse_obj(self._download_json(
self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
('CHANNEL', {dict})) or {}
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
data=urlencode_postdata({'bid': broadcaster_id})) or {}
channel_info = info.get('CHANNEL') or {}
broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no broadcast_no = channel_info.get('BNO') or broadcast_no
password_protected = channel_info.get('BPWD')
if not broadcast_no: if not broadcast_no:
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True) result = channel_info.get('RESULT')
if password_protected == 'Y' and password is None: if result == 0:
raise UserNotLive(video_id=broadcaster_id)
elif result == -6:
self.raise_login_required(
'This channel is streaming for subscribers only', method='password')
raise ExtractorError('Unable to extract broadcast number')
password = self.get_param('videopassword')
if channel_info.get('BPWD') == 'Y' and password is None:
raise ExtractorError( raise ExtractorError(
'This livestream is protected by a password, use the --video-password option', 'This livestream is protected by a password, use the --video-password option',
expected=True) expected=True)
formats = [] token_info = traverse_obj(self._download_json(
quality_key = qualities(self._QUALITIES) self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
for quality_str in self._QUALITIES: 'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
params = {
'bno': broadcast_no, 'bno': broadcast_no,
'stream_type': 'common', 'stream_type': 'common',
'type': 'aid', 'type': 'aid',
'quality': quality_str, 'quality': 'master',
} 'pwd': password,
if password is not None: }))), ('CHANNEL', {dict})) or {}
params['pwd'] = password aid = token_info.get('AID')
aid_response = self._download_json( if not aid:
self._LIVE_API_URL, broadcast_no, fatal=False, result = token_info.get('RESULT')
data=urlencode_postdata(params), if result == 0:
note=f'Downloading access token for {quality_str} stream', raise ExtractorError('This livestream has ended', expected=True)
errnote=f'Unable to download access token for {quality_str} stream') elif result == -6:
aid = traverse_obj(aid_response, ('CHANNEL', 'AID')) self.raise_login_required('This livestream is for subscribers only', method='password')
if not aid: raise ExtractorError('Unable to extract access token')
continue
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' formats = self._extract_formats(channel_info, broadcast_no, aid)
stream_info = self._download_json(
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
query={
'return_type': channel_info.get('CDN', 'gcp_cdn'),
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
},
note=f'Downloading metadata for {quality_str} stream',
errnote=f'Unable to download metadata for {quality_str} stream') or {}
if stream_info.get('view_url'): station_info = traverse_obj(self._download_json(
formats.append({ 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
'format_id': quality_str, 'Downloading channel metadata', 'Unable to download channel metadata',
'url': update_url_query(stream_info['view_url'], {'aid': aid}), query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
'ext': 'mp4',
'protocol': 'm3u8',
'quality': quality_key(quality_str),
})
station_info = self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
query={'szBjId': broadcaster_id}, fatal=False,
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
return { return {
'id': broadcast_no, 'id': broadcast_no,
'title': channel_info.get('TITLE') or station_info.get('station_title'), 'title': channel_info.get('TITLE') or station_info.get('station_title'),
'uploader': channel_info.get('BJNICK') or station_info.get('station_name'), 'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
'uploader_id': broadcaster_id, 'uploader_id': broadcaster_id,
'timestamp': unified_timestamp(station_info.get('broad_start')), 'timestamp': parse_iso8601(station_info.get('broad_start'), delimiter=' ', timezone=dt.timedelta(hours=9)),
'formats': formats, 'formats': formats,
'is_live': True, 'is_live': True,
'http_headers': {'Referer': url},
} }
class AfreecaTVUserIE(InfoExtractor): class AfreecaTVUserIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:user' IE_NAME = 'soop:user'
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?' _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
_TESTS = [{ _TESTS = [{
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -442,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 218, 'playlist_count': 218,
}, { }, {
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'parang1995', 'id': 'parang1995',
@ -450,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 997, 'playlist_count': 997,
}, { }, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -458,7 +406,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 221, 'playlist_count': 221,
}, { }, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -470,12 +418,12 @@ class AfreecaTVUserIE(InfoExtractor):
def _fetch_page(self, user_id, user_type, page): def _fetch_page(self, user_id, user_type, page):
page += 1 page += 1
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}') note=f'Downloading {user_type} video page {page}')
for item in info['data']: for item in info['data']:
yield self.url_result( yield self.url_result(
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
def _real_extract(self, url): def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')

View File

@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
'info_dict': { 'info_dict': {
'id': '91275', 'id': '91275',
'ext': 'aac', 'ext': 'mp3',
'title': 'md5:a9b15488009065556900169fb8061cce', 'title': 'md5:a9b15488009065556900169fb8061cce',
'episode': 'md5:a9b15488009065556900169fb8061cce', 'episode': 'md5:a9b15488009065556900169fb8061cce',
'series': 'Analizy', 'series': 'Analizy',
@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor):
raise ExtractorError('No such podcast', expected=True) raise ExtractorError('No such podcast', expected=True)
metadata = metadata[0] metadata = metadata[0]
formats = [] mp3_url = self._download_json(
for ext in ('aac', 'mp3'): 'https://api.podcast.radioagora.pl/api4/getSongUrl',
url_data = self._download_json( media_id, 'Downloading podcast mp3 URL', query={
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', 'podcast_id': media_id,
media_id, 'Downloading podcast %s URL' % ext) 'device_id': str(uuid.uuid4()),
# prevents inserting the mp3 (default) multiple times 'ppre': 'false',
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: 'audio': 'mp3',
formats.append({ })['link_ssl']
'url': url_data['link_ssl'],
'ext': ext,
'vcodec': 'none',
'acodec': ext,
})
return { return {
'id': media_id, 'id': media_id,
'formats': formats, 'url': mp3_url,
'vcodec': 'none',
'ext': 'mp3',
'title': metadata.get('podcast_name'), 'title': metadata.get('podcast_name'),
'series': metadata.get('series_name'), 'series': metadata.get('series_name'),
'episode': metadata.get('podcast_name'), 'episode': metadata.get('podcast_name'),
@ -206,8 +203,8 @@ class TokFMAuditionIE(InfoExtractor):
} }
@staticmethod @staticmethod
def _create_url(id): def _create_url(video_id):
return f'https://audycje.tokfm.pl/audycja/{id}' return f'https://audycje.tokfm.pl/audycja/{video_id}'
def _real_extract(self, url): def _real_extract(self, url):
audition_id = self._match_id(url) audition_id = self._match_id(url)

View File

@ -1,63 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main', 'Privacy'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'url': self._og_search_url(webpage),
'display_id': display_id,
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
})
return info_dict

View File

@ -5,7 +5,7 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
traverse_obj traverse_obj,
) )
@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
'view_count': int, 'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603, 'timestamp': 1664792603,
} },
}, { }, {
# with youtube_id # with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
'channel': 'Newsflare', 'channel': 'Newsflare',
'duration': 37, 'duration': 37,
'upload_date': '20180511', 'upload_date': '20180511',
} },
}] }]
def _get_formats_and_subtitle(self, json_data, video_id): def _get_formats_and_subtitle(self, json_data, video_id):

View File

@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
'timestamp': 1667370519, 'timestamp': 1667370519,
'title': 'Ангел хранитель 1 серия', 'title': 'Ангел хранитель 1 серия',
'channel_follower_count': int, 'channel_follower_count': int,
} },
}, { }, {
# embed url # embed url
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',

View File

@ -1,5 +1,4 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
try_get, try_get,
@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
'title': title, 'title': title,
'thumbnail': data.get('coverUrl'), 'thumbnail': data.get('coverUrl'),
'uploader': try_get( 'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str), data, lambda x: x['followBar']['name'], str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats, 'formats': formats,
} }

View File

@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
'timestamp': 1636219149, 'timestamp': 1636219149,
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
'upload_date': '20211106', 'upload_date': '20211106',
} },
}, { }, {
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
'info_dict': { 'info_dict': {
@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)' BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
def _real_extract(self, url): def _real_extract(self, url):
base, post_type, id = self._match_valid_url(url).groups() base, post_type, display_id = self._match_valid_url(url).groups()
wp = { wp = {
'balkans.aljazeera.net': 'ajb', 'balkans.aljazeera.net': 'ajb',
'chinese.aljazeera.net': 'chinese', 'chinese.aljazeera.net': 'chinese',
@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
'news': 'news', 'news': 'news',
}[post_type.split('/')[0]] }[post_type.split('/')[0]]
video = self._download_json( video = self._download_json(
f'https://{base}/graphql', id, query={ f'https://{base}/graphql', display_id, query={
'wp-site': wp, 'wp-site': wp,
'operationName': 'ArchipelagoSingleArticleQuery', 'operationName': 'ArchipelagoSingleArticleQuery',
'variables': json.dumps({ 'variables': json.dumps({
'name': id, 'name': display_id,
'postType': post_type, 'postType': post_type,
}), }),
}, headers={ }, headers={
@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
embed = 'default' embed = 'default'
if video_id is None: if video_id is None:
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, display_id)
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
group=(1, 2, 3, 4), default=(None, None, None, None)) group=(1, 2, 3, 4), default=(None, None, None, None))
@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': url, 'url': url,
'ie_key': 'Generic' 'ie_key': 'Generic',
} }
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
'ie_key': 'BrightcoveNew' 'ie_key': 'BrightcoveNew',
} }

View File

@ -1,5 +1,4 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
duration = int_or_none(video.get('duration')) duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count')) view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get( timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str)) video, lambda x: x['added_at']['date'], str))
else: else:
video_id = display_id video_id = display_id
media_data = self._download_json( media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
for key, value in media_data['video'].items(): for key, value in media_data['video'].items():
if not key.endswith('Path'): if not key.endswith('Path'):

View File

@ -0,0 +1,252 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
join_nonempty,
parse_qs,
urljoin,
)
from ..utils.traversal import traverse_obj
_FIELDS = '''
_id
clipImageSource
clipImageThumb
clipLink
clipTitle
createdDate
shareId
user { _id }
username
views'''
_EXTRA_FIELDS = '''
clipLength
clipSizeBytes'''
_QUERIES = {
'clip': '''query ($id: String!) {
video: getClip(clipIdentifier: $id) {
%s %s
}
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'montage': '''query ($id: String!) {
video: getMontage(clipIdentifier: $id) {
%s
}
}''' % _FIELDS, # noqa: UP031
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'Montages': '''query ($page: Int!, $user: String!) {
videos: montages(search: createdDate, page: $page, user: $user) {
data { %s }
}
}''' % _FIELDS, # noqa: UP031
'Mobile Clips': '''query ($page: Int!, $user: String!) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
}
class AllstarBaseIE(InfoExtractor):
@staticmethod
def _parse_video_data(video_data):
def media_url_or_none(path):
return urljoin('https://media.allstar.gg/', path)
info = traverse_obj(video_data, {
'id': ('_id', {str}),
'display_id': ('shareId', {str}),
'title': ('clipTitle', {str}),
'url': ('clipLink', {media_url_or_none}),
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}),
})
if info.get('id') and info.get('url'):
basename = 'clip' if '/clips/' in info['url'] else 'montage'
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
info.update({
'extractor_key': AllstarIE.ie_key(),
'extractor': AllstarIE.IE_NAME,
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
})
return info
def _call_api(self, query, variables, path, video_id=None, note=None):
response = self._download_json(
'https://a1.allstar.gg/graphql', video_id, note=note,
headers={'content-type': 'application/json'},
data=json.dumps({'variables': variables, 'query': query}).encode())
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
if errors:
raise ExtractorError('; '.join(errors))
return traverse_obj(response, path)
class AllstarIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
'info_dict': {
'id': '64482c2da9eec30008a67d1b',
'title': '4K on Inferno',
'url': 'md5:66befb5381eef0c9456026386c25fa55',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'uploader': 'chrk.',
'ext': 'mp4',
'duration': 20,
'filesize': 21199257,
'timestamp': 1682451501,
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230425',
'view_count': int,
},
}, {
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
'info_dict': {
'id': '64a1ec6b887f4c0008dc50b8',
'display_id': '8LJLY4JKB',
'title': 'AK-47 3K on Mirage',
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
'ext': 'mp4',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'duration': 16,
'filesize': 30175859,
'timestamp': 1688333419,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230702',
'view_count': int,
},
}, {
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
'info_dict': {
'id': '643e64089da7e9363e1fa66c',
'display_id': 'APQLGM2IMXW',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:a3ee356022115db2b27c81321d195945',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1681810448,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230418',
'view_count': int,
},
}, {
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
'info_dict': {
'id': '64a2697372ce3703de29e868',
'display_id': 'RILJMH6QOS',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1688365434,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230703',
'view_count': int,
},
}]
def _real_extract(self, url):
query_id, video_id = self._match_valid_url(url).group('type', 'id')
return self._parse_video_data(
self._call_api(
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
class AllstarProfileIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips',
'title': 'cherokee - Clips',
},
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips-730',
'title': 'cherokee - Clips - 730',
},
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-montages',
'title': 'cherokee - Montages',
},
'playlist_mincount': 4,
}, {
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-mobile',
'title': 'cherokee - Mobile Clips',
},
'playlist_mincount': 1,
}]
_PAGE_SIZE = 10
def _get_page(self, user_id, display_id, game, query, page_num):
page_num += 1
for video_data in self._call_api(
query, {
'user': user_id,
'page': page_num,
'game': game,
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
yield self._parse_video_data(video_data)
def _real_extract(self, url):
display_id = self._match_id(url)
profile_data = self._download_json(
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
if not user_id:
raise ExtractorError('Unable to extract the user id')
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
url_query = parse_qs(url)
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
return self.playlist_result(
OnDemandPagedList(
functools.partial(
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))

View File

@ -1,9 +1,9 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_iso8601, int_or_none,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
int_or_none, parse_iso8601,
) )
@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
'tbr': 1145, 'tbr': 1145,
'categories': list, 'categories': list,
'age_limit': 18, 'age_limit': 18,
} },
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -12,7 +12,7 @@ from ..utils import (
class Alsace20TVBaseIE(InfoExtractor): class Alsace20TVBaseIE(InfoExtractor):
def _extract_video(self, video_id, url=None): def _extract_video(self, video_id, url=None):
info = self._download_json( info = self._download_json(
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
video_id) or {} video_id) or {}
title = info.get('titre') title = info.get('titre')
@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,

View File

@ -0,0 +1,104 @@
import re
from .archiveorg import ArchiveOrgIE
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
clean_html,
int_or_none,
orderedSet,
str_to_int,
urljoin,
)
class AltCensoredIE(InfoExtractor):
IE_NAME = 'altcensored'
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
'info_dict': {
'id': 'youtube-k0srjLSkga8',
'ext': 'webm',
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403',
'creators': ['Virginie Vota'],
'release_year': 2018,
'upload_date': '20230318',
'uploader': 'admin@altcensored.com',
'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
'timestamp': 1679161343,
'track': 'k0srjLSkga8',
'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
'categories': ['News & Politics'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
category = clean_html(self._html_search_regex(
r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None))
return {
'_type': 'url_transparent',
'url': f'https://archive.org/details/youtube-{video_id}',
'ie_key': ArchiveOrgIE.ie_key(),
'view_count': str_to_int(self._html_search_regex(
r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=None)),
'categories': [category] if category else None,
}
class AltCensoredChannelIE(InfoExtractor):
IE_NAME = 'altcensored:channel'
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
_PAGE_SIZE = 24
_TESTS = [{
'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
'info_dict': {
'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
},
'playlist_count': 85,
}, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': {
'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
},
'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1'))
def page_func(page_num):
page_num += 1
webpage = self._download_webpage(
f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
channel_id, note=f'Downloading page {page_num}')
items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
for path in orderedSet(items)]
return self.playlist_result(
InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
playlist_id=channel_id, playlist_title=title)

View File

@ -1,17 +1,13 @@
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError,
clean_html,
int_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
int_or_none,
clean_html,
ExtractorError
) )
@ -25,7 +21,7 @@ class AluraIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '60095', 'id': '60095',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Referências, ref-set e alter' 'title': 'Referências, ref-set e alter',
}, },
'skip': 'Requires alura account credentials'}, 'skip': 'Requires alura account credentials'},
{ {
@ -34,12 +30,12 @@ class AluraIE(InfoExtractor):
'only_matching': True}, 'only_matching': True},
{ {
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
'only_matching': True} 'only_matching': True},
] ]
def _real_extract(self, url): def _real_extract(self, url):
course, video_id = self._match_valid_url(url) course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id) video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos') video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
formats = [] formats = []
for video_obj in video_dict: for video_obj in video_dict:
video_url_m3u8 = video_obj.get('link') video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats( video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native', video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
@ -66,7 +62,7 @@ class AluraIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
"formats": formats 'formats': formats,
} }
def _perform_login(self, username, password): def _perform_login(self, username, password):
@ -95,7 +91,7 @@ class AluraIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url') 'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage( response = self._download_webpage(
post_url, None, 'Logging in', post_url, None, 'Logging in',
@ -107,7 +103,7 @@ class AluraIE(InfoExtractor):
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>', r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
response, 'error message', default=None) response, 'error message', default=None)
if error: if error:
raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError(f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
@ -123,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) return False if AluraIE.suitable(url) else super().suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
@ -161,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
'url': video_url, 'url': video_url,
'id_key': self.ie_key(), 'id_key': self.ie_key(),
'chapter': chapter, 'chapter': chapter,
'chapter_number': chapter_number 'chapter_number': chapter_number,
} }
entries.append(entry) entries.append(entry)
return self.playlist_result(entries, course_path, course_title) return self.playlist_result(entries, course_path, course_title)

View File

@ -0,0 +1,77 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AmadeusTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
'info_dict': {
'id': '5576678021301411311',
'ext': 'mp4',
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
'duration': 1264.8,
'upload_date': '20230918',
'timestamp': 1695034800,
'display_id': '65091a87ff85af59d9fc54c3',
'view_count': int,
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
if not video_id:
raise ExtractorError('Unable to extract actual video ID')
video_data = self._download_json(
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
formats = []
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
if not url_or_none(video.get('url')):
continue
formats.append({
**traverse_obj(video, {
'url': 'url',
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': (('totalSize', 'size'), {int_or_none}),
'vcodec': ('videoStreamList', 0, 'codec'),
'acodec': ('audioStreamList', 0, 'codec'),
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
}, get_all=False),
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoInfo', 'basicInfo', 'name', {str}),
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
}, get_all=False),
**traverse_obj(nuxt_data, ('item', {
'title': (('title', 'title_en', 'title_cn'), {str}),
'description': (('description', 'description_en', 'description_cn'), {str}),
'timestamp': ('date', {parse_iso8601}),
'view_count': ('view', {int_or_none}),
}), get_all=False),
}

View File

@ -1,6 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
'uploader': 'PBS NewsHour', 'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour', 'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570, 'timestamp': 1549639570,
} },
}, { }, {
# Vimeo # Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
'timestamp': 1294763658, 'timestamp': 1294763658,
'upload_date': '20110111', 'upload_date': '20110111',
'uploader': 'Sam Morrill', 'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill' 'uploader_id': 'sammorrill',
} },
}, { }, {
# Direct Link # Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
'subtitles': dict, 'subtitles': dict,
'upload_date': '20091007', 'upload_date': '20091007',
'timestamp': 1254942511, 'timestamp': 1254942511,
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
meta = self._download_json( meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id, f'https://amara.org/api/videos/{video_id}/',
video_id, query={'format': 'json'}) video_id, query={'format': 'json'})
title = meta['title'] title = meta['title']
video_url = meta['all_urls'][0] video_url = meta['all_urls'][0]

View File

@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) playlist_id = self._match_id(url)
for retry in self.RetryManager(): for retry in self.RetryManager():
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, playlist_id)
try: try:
data_json = self._search_json( data_json = self._search_json(
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
transform_source=js_to_json) transform_source=js_to_json)
except ExtractorError as e: except ExtractorError as e:
retry.error = e retry.error = e
@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
'height': int_or_none(video.get('videoHeight')), 'height': int_or_none(video.get('videoHeight')),
'width': int_or_none(video.get('videoWidth')), 'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
class AmazonReviewsIE(InfoExtractor): class AmazonReviewsIE(InfoExtractor):

View File

@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):
resp = self._download_json( resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
asin, note=note, headers={'Content-Type': 'application/json'}, asin, note=note, headers={
data=json.dumps(data).encode() if data else None, 'Content-Type': 'application/json',
'currentpageurl': '/',
'currentplatform': 'dWeb',
}, data=json.dumps(data).encode() if data else None,
query=None if data else { query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N', 'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin, 'contentId': asin,
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'May I Kiss You?', 'title': 'May I Kiss You?',
'language': 'Hindi', 'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'description': 'md5:a549bfc747973e04feb707833474e59d', 'description': 'md5:a549bfc747973e04feb707833474e59d',
'release_timestamp': 1644710400, 'release_timestamp': 1644710400,
'release_date': '20220213', 'release_date': '20220213',
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jahaan', 'title': 'Jahaan',
'language': 'Hindi', 'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'description': 'md5:05eb765a77bf703f322f120ec6867339', 'description': 'md5:05eb765a77bf703f322f120ec6867339',
'release_timestamp': 1647475200, 'release_timestamp': 1647475200,
'release_date': '20220317', 'release_date': '20220317',

View File

@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True, 'only_matching': True,
@ -63,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
site, display_id = self._match_valid_url(url).groups() site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site] requestor_id = self._REQUESTOR_ID_MAP[site]
page_data = self._download_json( page_data = self._download_json(
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
% (requestor_id.lower(), display_id), display_id)['data'] display_id)['data']
properties = page_data.get('properties') or {} properties = page_data.get('properties') or {}
query = { query = {
'mbr': 'true', 'mbr': 'true',
@ -75,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
try: try:
for v in page_data['children']: for v in page_data['children']:
if v.get('type') == 'video-player': if v.get('type') == 'video-player':
releasePid = v['properties']['currentVideo']['meta']['releasePid'] release_pid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + releasePid tp_path = 'M_UwQC/' + release_pid
media_url = 'https://link.theplatform.com/s/' + tp_path media_url = 'https://link.theplatform.com/s/' + tp_path
video_player_count += 1 video_player_count += 1
except KeyError: except KeyError:
pass pass
if video_player_count > 1: if video_player_count > 1:
self.report_warning( self.report_warning(
'The JSON data has %d video players. Only one will be extracted' % video_player_count) f'The JSON data has {video_player_count} video players. Only one will be extracted')
# Fall back to videoPid if releasePid not found. # Fall back to videoPid if releasePid not found.
# TODO: Fall back to videoPid if releasePid manifest uses DRM. # TODO: Fall back to videoPid if releasePid manifest uses DRM.
@ -130,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
}) })
ns_keys = theplatform_metadata.get('$xmlns', {}).keys() ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys: if ns_keys:
ns = list(ns_keys)[0] ns = next(iter(ns_keys))
episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode = theplatform_metadata.get(ns + '$episodeTitle') or None
episode_number = int_or_none( episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode')) theplatform_metadata.get(ns + '$episode'))

View File

@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
resource_type = 'episodes' resource_type = 'episodes'
resource = self._download_json( resource = self._download_json(
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
video = resource['video'] if is_episode else resource video = resource['video'] if is_episode else resource
episode = resource if is_episode else resource.get('episode') or {} episode = resource if is_episode else resource.get('episode') or {}
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
'ie_key': 'Zype', 'ie_key': 'Zype',
'description': clean_html(video.get('description')), 'description': clean_html(video.get('description')),
'timestamp': unified_timestamp(video.get('publishDate')), 'timestamp': unified_timestamp(video.get('publishDate')),
@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
] ]
if season_number: if season_number:
playlist_id = 'season_%d' % season_number playlist_id = f'season_{season_number}'
playlist_title = 'Season %d' % season_number playlist_title = f'Season {season_number}'
facet_filters.append('search_season_list:' + playlist_title) facet_filters.append('search_season_list:' + playlist_title)
else: else:
playlist_id = show playlist_id = show
playlist_title = title playlist_title = title
season_search = self._download_json( season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
playlist_id, headers={ playlist_id, headers={
'Origin': 'https://www.americastestkitchen.com', 'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30', 'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={ }, query={
'facetFilters': json.dumps(facet_filters), 'facetFilters': json.dumps(facet_filters),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
'attributesToHighlight': '', 'attributesToHighlight': '',
'hitsPerPage': 1000, 'hitsPerPage': 1000,
}) })
@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'description': episode.get('description'), 'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')), 'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number, 'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
'ie_key': AmericasTestKitchenIE.ie_key(), 'ie_key': AmericasTestKitchenIE.ie_key(),
} }

View File

@ -1,7 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
determine_ext,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
'Unable to download Akamai AMP feed', transform_source=strip_jsonp) 'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item') item = feed.get('channel', {}).get('item')
if not item: if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
video_id = item['guid'] video_id = item['guid']
def get_media_node(name, default=None): def get_media_node(name, default=None):
media_name = 'media-%s' % name media_name = f'media-{name}'
media_group = item.get('media-group') or item media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default) return media_group.get(media_name) or item.get(media_name) or item.get(name, default)

View File

@ -5,7 +5,7 @@ from ..utils import (
int_or_none, int_or_none,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
unified_timestamp unified_timestamp,
) )
@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'release_date': '20230121', 'release_date': '20230121',
'release_timestamp': 1674285179, 'release_timestamp': 1674285179,
'episode_id': 'e1tpt3d', 'episode_id': 'e1tpt3d',
} },
}, { }, {
# embed url # embed url
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'season': 'Season 2', 'season': 'Season 2',
'season_number': 2, 'season_number': 2,
'episode_id': 'e1shjqd', 'episode_id': 'e1shjqd',
} },
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'uploader': 'Podcast Tempo', 'uploader': 'Podcast Tempo',
'channel': 'apakatatempo', 'channel': 'apakatatempo',
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,7 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import url_or_none, merge_dicts from ..utils import merge_dicts, url_or_none
class AngelIE(InfoExtractor): class AngelIE(InfoExtractor):
@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 1359.0 'duration': 1359.0,
} },
}, { }, {
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 3276.0 'duration': 3276.0,
} },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'formats': formats, 'formats': formats,
'subtitles': subtitles 'subtitles': subtitles,
} }
# Angel uses cloudinary in the background and supports image transformations. # Angel uses cloudinary in the background and supports image transformations.

View File

@ -5,22 +5,26 @@ from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
make_archive_id,
scale_thumbnails_to_max_format_width, scale_thumbnails_to_max_format_width,
) )
class Ant1NewsGrBaseIE(InfoExtractor): class AntennaBaseIE(InfoExtractor):
def _download_and_extract_api_data(self, video_id, netloc, cid=None): def _download_and_extract_api_data(self, video_id, netloc, cid=None):
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}' info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
info = self._download_json(url, video_id, query={'cid': cid or video_id}) video_id, query={'cid': cid or video_id})
try: if not info.get('url'):
source = info['url'] raise ExtractorError(f'No source found for {video_id}')
except KeyError:
raise ExtractorError('no source found for %s' % video_id) ext = determine_ext(info['url'])
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') if ext == 'm3u8':
if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
else:
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
thumbnails = scale_thumbnails_to_max_format_width( thumbnails = scale_thumbnails_to_max_format_width(
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
return { return {
'id': video_id, 'id': video_id,
'title': info.get('title'), 'title': info.get('title'),
@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor):
} }
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE): class AntennaGrWatchIE(AntennaBaseIE):
IE_NAME = 'ant1newsgr:watch' IE_NAME = 'antenna:watch'
IE_DESC = 'ant1news.gr videos' IE_DESC = 'antenna.gr and ant1news.gr videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/' _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
_API_PATH = '/templates/data/player' _API_PATH = '/templates/data/player'
_TESTS = [{ _TESTS = [{
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45', 'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
'md5': '95925e6b32106754235f2417e0d2dfab', 'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
'info_dict': { 'info_dict': {
'id': '1506168', 'id': '1506168',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a', 'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0', 'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg', 'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
},
}, {
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
'info_dict': {
'id': '1643812',
'ext': 'mp4',
'format_id': 'mp4',
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ ΕΠΕΙΣΟΔΙΟ 01',
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
}, },
}] }]
@ -52,25 +66,26 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
video_id, netloc = self._match_valid_url(url).group('id', 'netloc') video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
info = self._download_and_extract_api_data(video_id, netloc) info = self._download_and_extract_api_data(video_id, netloc)
info['description'] = self._og_search_description(webpage) info['description'] = self._og_search_description(webpage, default=None)
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)]
return info return info
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): class Ant1NewsGrArticleIE(AntennaBaseIE):
IE_NAME = 'ant1newsgr:article' IE_NAME = 'ant1newsgr:article'
IE_DESC = 'ant1news.gr articles' IE_DESC = 'ant1news.gr articles'
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/' _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
_TESTS = [{ _TESTS = [{
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
'md5': '294f18331bb516539d72d85a82887dcc', 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
'info_dict': { 'info_dict': {
'id': '_xvg/m_cmbatw=', 'id': '_xvg/m_cmbatw=',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
'timestamp': 1603092840, 'timestamp': 1666166520,
'upload_date': '20201019', 'upload_date': '20221019',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
}, },
}, { }, {
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
@ -90,19 +105,19 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls: if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True) raise ExtractorError(f'no videos found for {video_id}', expected=True)
return self.playlist_from_matches( return self.playlist_from_matches(
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE): class Ant1NewsGrEmbedIE(AntennaBaseIE):
IE_NAME = 'ant1newsgr:embed' IE_NAME = 'ant1newsgr:embed'
IE_DESC = 'ant1news.gr embedded videos' IE_DESC = 'ant1news.gr embedded videos'
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer' _API_PATH = '/templates/data/jsonPlayer'
_TESTS = [{ _TESTS = [{
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',

View File

@ -8,10 +8,8 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_encrypt from ..aes import aes_encrypt
from ..utils import ( from ..utils import (
bytes_to_intlist,
determine_ext, determine_ext,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
smuggle_url, smuggle_url,
strip_jsonp, strip_jsonp,
@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{ _TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e', 'md5': '837718bcfb3a7778d022f857f7a9b19e',
@ -238,32 +218,7 @@ class AnvatoIE(InfoExtractor):
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
}
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id),
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
} }
def _server_time(self, access_key, video_id): def _server_time(self, access_key, video_id):
@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
server_time = self._server_time(access_key, video_id) server_time = self._server_time(access_key, video_id)
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt( auth_secret = bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) list(input_data[:64].encode()), list(self._AUTH_KEY)))
query = { query = {
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
'rtyp': 'fp', 'rtyp': 'fp',
@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
} }
if extracted_token is not None: if extracted_token is not None:
api['anvstk2'] = extracted_token api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None: elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else: else:
@ -299,7 +252,7 @@ class AnvatoIE(InfoExtractor):
return self._download_json( return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp, query=query, video_data_url, video_id, transform_source=strip_jsonp, query=query,
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) data=json.dumps({'api': api}, separators=(',', ':')).encode())
def _get_anvato_videos(self, access_key, video_id, token): def _get_anvato_videos(self, access_key, video_id, token):
video_data = self._get_video_json(access_key, video_id, token) video_data = self._get_video_json(access_key, video_id, token)
@ -358,7 +311,7 @@ class AnvatoIE(InfoExtractor):
for caption in video_data.get('captions', []): for caption in video_data.get('captions', []):
a_caption = { a_caption = {
'url': caption['url'], 'url': caption['url'],
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
} }
subtitles.setdefault(caption['language'], []).append(a_caption) subtitles.setdefault(caption['language'], []).append(a_caption)
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)

View File

@ -10,6 +10,7 @@ from ..utils import (
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
_WORKING = False
IE_NAME = 'aol.com' IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
@ -29,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} },
}, { }, {
# video with vidible ID # video with vidible ID
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
@ -45,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} },
}, { }, {
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True, 'only_matching': True,
@ -82,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
return self._extract_yahoo_video(video_id, 'us') return self._extract_yahoo_video(video_id, 'us')
response = self._download_json( response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
video_id)['response'] video_id)['response']
if response['statusText'] != 'Ok': if response['statusText'] != 'Ok':
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
video_data = response['data'] video_data = response['data']
formats = [] formats = []

View File

@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
video_id, base_url = mobj.group('id', 'base_url') video_id, base_url = mobj.group('id', 'base_url')
webpage = self._download_webpage( webpage = self._download_webpage(
'%s/player/%s' % (base_url, video_id), video_id) f'{base_url}/player/{video_id}', video_id)
jwplatform_id = self._search_regex( jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
def extract(field, name=None): def extract(field, name=None):
return self._search_regex( return self._search_regex(
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field, rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, name or field, default=None, group='value') webpage, name or field, default=None, group='value')
title = extract('title') or video_id title = extract('title') or video_id

View File

@ -1,8 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError, str_to_int
str_to_int,
ExtractorError
)
class AppleConnectIE(InfoExtractor): class AppleConnectIE(InfoExtractor):

View File

@ -1,30 +1,45 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
clean_podcast_url, clean_podcast_url,
get_element_by_class,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get,
) )
from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor): class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
'info_dict': {
'id': '1000665010654',
'ext': 'mp3',
'title': 'Ferreck Dawn - To The Break of Dawn 117',
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
'upload_date': '20240812',
'timestamp': 1723449600,
'duration': 3596,
'series': 'Ferreck Dawn - To The Break of Dawn',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
}, {
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'md5': '41dc31cd650143e530d9423b6b5a344f', 'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
'info_dict': { 'info_dict': {
'id': '1000482637777', 'id': '1000482637777',
'ext': 'mp3', 'ext': 'mp3',
'title': '207 - Whitney Webb Returns', 'title': '207 - Whitney Webb Returns',
'episode': '207 - Whitney Webb Returns',
'episode_number': 207,
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705', 'upload_date': '20200705',
'timestamp': 1593932400, 'timestamp': 1593932400,
'duration': 6454, 'duration': 5369,
'series': 'The Tim Dillon Show', 'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)', 'thumbnail': 're:.+[.](png|jpe?g|webp)',
} },
}, { }, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'only_matching': True, 'only_matching': True,
@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
episode_data = {} server_data = self._search_json(
ember_data = {} r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
# new page type 2021-11 'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
amp_data = self._parse_json(self._search_regex( model_data = traverse_obj(server_data, (
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<', 'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {} 'model', {dict}, any))
amp_data = try_get(amp_data,
lambda a: self._parse_json(
next(a[x] for x in iter(a) if episode_id in x),
episode_id),
dict) or {}
amp_data = amp_data.get('d') or []
episode_data = try_get(
amp_data,
lambda a: next(x for x in a
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
dict)
if not episode_data:
# try pre 2021-11 page type: TODO: consider deleting if no longer used
ember_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id) or {}
ember_data = ember_data.get(episode_id) or ember_data
episode_data = try_get(ember_data, lambda x: x['data'], dict)
episode = episode_data['attributes']
description = episode.get('description') or {}
series = None
for inc in (amp_data or ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
return { return {
'id': episode_id, 'id': episode_id,
'title': episode.get('name'), **self._json_ld(
'url': clean_podcast_url(episode['assetUrl']), traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
'description': description.get('standard') or description.get('short'), or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
'timestamp': parse_iso8601(episode.get('releaseDateTime')), **traverse_obj(model_data, {
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000), 'title': ('title', {str}),
'series': series, 'url': ('streamUrl', {clean_podcast_url}),
'timestamp': ('releaseDate', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
}),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'vcodec': 'none', 'vcodec': 'none',
} }

View File

@ -1,8 +1,8 @@
import re
import json import json
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
'uploader_id': 'wb', 'uploader_id': 'wb',
}, },
}, },
] ],
}, { }, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': { 'info_dict': {
@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
webpage = self._download_webpage(url, movie) webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json( film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
film_id, fatal=False) film_id, fatal=False)
if film_data: if film_data:
@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
if not src: if not src:
continue continue
formats.append({ formats.append({
'format_id': '%s-%s' % (version, size), 'format_id': f'{version}-{size}',
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')), 'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')), 'height': int_or_none(size_data.get('height')),
@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
page_data = film_data.get('page', {}) page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title')) return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s): def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
# like: http://trailers.apple.com/trailers/wb/gravity/ # like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m): def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;') return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '&#39;'))
s = re.sub(self._JSON_RE, _clean_json, s) s = re.sub(self._JSON_RE, _clean_json, s)
s = '<html>%s</html>' % s return f'<html>{s}</html>'
return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html) doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
playlist = [] playlist = []
@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
formats = [] formats = []
for format in settings['metadata']['sizes']: for fmt in settings['metadata']['sizes']:
# The src is a file pointing to the real video file # The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format': format['type'], 'format': fmt['type'],
'width': int_or_none(format['width']), 'width': int_or_none(fmt['width']),
'height': int_or_none(format['height']), 'height': int_or_none(fmt['height']),
}) })
playlist.append({ playlist.append({
@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
'title': 'Movie Studios', 'title': 'Movie Studios',
}, },
} }
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
_TESTS = [{ _TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded', 'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': { 'info_dict': {
@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
section = self._match_id(url) section = self._match_id(url)
section_data = self._download_json( section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
section) section)
entries = [ entries = [
self.url_result('http://trailers.apple.com' + e['location']) self.url_result('http://trailers.apple.com' + e['location'])

View File

@ -1,11 +1,11 @@
from __future__ import annotations
import json import json
import re import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .naver import NaverBaseIE
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest from ..networking import HEADRequest
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -32,6 +32,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
variadic,
) )
@ -50,10 +51,9 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19681210', 'release_date': '19681210',
'timestamp': 1268695290, 'timestamp': 1268695290,
'upload_date': '20100315', 'upload_date': '20100315',
'creator': 'SRI International', 'creators': ['SRI International'],
'uploader': 'laura@archive.org', 'uploader': 'laura@archive.org',
'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'release_year': 1968,
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect', 'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
@ -111,7 +111,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Turning', 'title': 'Turning',
'ext': 'flac', 'ext': 'flac',
'track': 'Turning', 'track': 'Turning',
'creator': 'Grateful Dead', 'creators': ['Grateful Dead'],
'display_id': 'gd1977-05-08d01t01.flac', 'display_id': 'gd1977-05-08d01t01.flac',
'track_number': 1, 'track_number': 1,
'album': '1977-05-08 - Barton Hall - Cornell University', 'album': '1977-05-08 - Barton Hall - Cornell University',
@ -131,11 +131,10 @@ class ArchiveOrgIE(InfoExtractor):
'location': 'Barton Hall - Cornell University', 'location': 'Barton Hall - Cornell University',
'duration': 438.68, 'duration': 438.68,
'track': 'Deal', 'track': 'Deal',
'creator': 'Grateful Dead', 'creators': ['Grateful Dead'],
'album': '1977-05-08 - Barton Hall - Cornell University', 'album': '1977-05-08 - Barton Hall - Cornell University',
'release_date': '19770508', 'release_date': '19770508',
'display_id': 'gd1977-05-08d01t07.flac', 'display_id': 'gd1977-05-08d01t07.flac',
'release_year': 1977,
'track_number': 7, 'track_number': 7,
}, },
}, { }, {
@ -147,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Bells Of Rostov', 'title': 'Bells Of Rostov',
'ext': 'mp3', 'ext': 'mp3',
}, },
'skip': 'restricted' 'skip': 'restricted',
}, { }, {
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
@ -160,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
'description': 'md5:012b2d668ae753be36896f343d12a236', 'description': 'md5:012b2d668ae753be36896f343d12a236',
'upload_date': '20190928', 'upload_date': '20190928',
}, },
'skip': 'restricted' 'skip': 'restricted',
}, { }, {
# Original formats are private # Original formats are private
'url': 'https://archive.org/details/irelandthemakingofarepublic', 'url': 'https://archive.org/details/irelandthemakingofarepublic',
@ -170,7 +169,7 @@ class ArchiveOrgIE(InfoExtractor):
'upload_date': '20160610', 'upload_date': '20160610',
'description': 'md5:f70956a156645a658a0dc9513d9e78b7', 'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
'uploader': 'dimitrios@archive.org', 'uploader': 'dimitrios@archive.org',
'creator': ['British Broadcasting Corporation', 'Time-Life Films'], 'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
'timestamp': 1465594947, 'timestamp': 1465594947,
}, },
'playlist': [ 'playlist': [
@ -204,8 +203,28 @@ class ArchiveOrgIE(InfoExtractor):
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
'display_id': 'irelandthemakingofarepublicreel2.mov', 'display_id': 'irelandthemakingofarepublicreel2.mov',
}, },
} },
] ],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}] }]
@staticmethod @staticmethod
@ -222,7 +241,7 @@ class ArchiveOrgIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = urllib.parse.unquote_plus(self._match_id(url)) video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2] identifier, _, entry_id = video_id.partition('/')
# Archive.org metadata API doesn't clearly demarcate playlist entries # Archive.org metadata API doesn't clearly demarcate playlist entries
# or subtitle tracks, so we get them from the embeddable player. # or subtitle tracks, so we get them from the embeddable player.
@ -248,7 +267,7 @@ class ArchiveOrgIE(InfoExtractor):
if track['kind'] != 'subtitles': if track['kind'] != 'subtitles':
continue continue
entries[p['orig']][track['label']] = { entries[p['orig']][track['label']] = {
'url': 'https://archive.org/' + track['file'].lstrip('/') 'url': 'https://archive.org/' + track['file'].lstrip('/'),
} }
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
@ -260,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': m['title'], 'title': m['title'],
'description': clean_html(m.get('description')), 'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']), 'uploader': dict_get(m, ['uploader', 'adder']),
'creator': m.get('creator'), 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'), 'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')), 'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
@ -275,7 +294,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': f.get('title') or f['name'], 'title': f.get('title') or f['name'],
'display_id': f['name'], 'display_id': f['name'],
'description': clean_html(f.get('description')), 'description': clean_html(f.get('description')),
'creator': f.get('creator'), 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'duration': parse_duration(f.get('length')), 'duration': parse_duration(f.get('length')),
'track_number': int_or_none(f.get('track')), 'track_number': int_or_none(f.get('track')),
'album': f.get('album'), 'album': f.get('album'),
@ -295,7 +314,9 @@ class ArchiveOrgIE(InfoExtractor):
'height': int_or_none(f.get('width')), 'height': int_or_none(f.get('width')),
'filesize': int_or_none(f.get('size'))}) 'filesize': int_or_none(f.get('size'))})
extension = (f['name'].rsplit('.', 1) + [None])[1] _, has_ext, extension = f['name'].rpartition('.')
if not has_ext:
extension = None
# We don't want to skip private formats if the user has access to them, # We don't want to skip private formats if the user has access to them,
# however without access to an account with such privileges we can't implement/test this. # however without access to an account with such privileges we can't implement/test this.
@ -303,14 +324,14 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),
'filesize': int_or_none(f.get('size')), 'filesize': int_or_none(f.get('size')),
'protocol': 'https', 'protocol': 'https',
'source_preference': 0 if f.get('source') == 'original' else -1, 'source_preference': 0 if f.get('source') == 'original' else -1,
'format_note': f.get('source') 'format_note': f.get('source'),
}) })
for entry in entries.values(): for entry in entries.values():
@ -334,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
info['comments'].append({ info['comments'].append({
'id': review.get('review_id'), 'id': review.get('review_id'),
'author': review.get('reviewer'), 'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')), 'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'}) 'parent': 'root'})
@ -373,7 +394,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/Zeurel', 'uploader_url': 'https://www.youtube.com/user/Zeurel',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
} },
}, { }, {
# Internal link # Internal link
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
@ -390,7 +411,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/1veritasium', 'uploader_url': 'https://www.youtube.com/user/1veritasium',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
} },
}, { }, {
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
@ -405,8 +426,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'machinima', 'uploader_id': 'machinima',
'uploader_url': 'https://www.youtube.com/user/machinima', 'uploader_url': 'https://www.youtube.com/user/machinima',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'machinima' 'uploader': 'machinima',
} },
}, { }, {
# FLV video. Video file URL does not provide itag information # FLV video. Video file URL does not provide itag information
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
@ -423,7 +444,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'jawed', 'uploader': 'jawed',
} },
}, { }, {
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
'info_dict': { 'info_dict': {
@ -439,7 +460,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'uploader_url': 'https://www.youtube.com/user/itsmadeon',
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
} },
}, { }, {
# First capture is of dead video, second is the oldest from CDX response. # First capture is of dead video, second is the oldest from CDX response.
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
@ -456,7 +477,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'ETC News', 'uploader': 'ETC News',
} },
}, { }, {
# First capture of dead video, capture date in link links to dead capture. # First capture of dead video, capture date in link links to dead capture.
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
@ -475,15 +496,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'ETC News', 'uploader': 'ETC News',
}, },
'expected_warnings': [ 'expected_warnings': [
r'unable to download capture webpage \(it may not be archived\)' r'unable to download capture webpage \(it may not be archived\)',
] ],
}, { # Very old YouTube page, has - YouTube in title. }, { # Very old YouTube page, has - YouTube in title.
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
'info_dict': { 'info_dict': {
'id': '-06-KB9XTzg', 'id': '-06-KB9XTzg',
'ext': 'flv', 'ext': 'flv',
'title': 'New Coin Hack!! 100% Safe!!' 'title': 'New Coin Hack!! 100% Safe!!',
} },
}, { }, {
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
'info_dict': { 'info_dict': {
@ -497,7 +518,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'DankPods', 'uploader': 'DankPods',
} },
}, { }, {
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
@ -514,7 +535,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'PewDiePie', 'uploader_id': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
} },
}, { }, {
# ~June 2010 Capture. swfconfig # ~June 2010 Capture. swfconfig
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
@ -529,7 +550,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
'upload_date': '20090520', 'upload_date': '20090520',
} },
}, { }, {
# Jan 2011: watch-video-date/eow-date surrounded by whitespace # Jan 2011: watch-video-date/eow-date surrounded by whitespace
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
@ -544,7 +565,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 132, 'duration': 132,
'uploader_url': 'https://www.youtube.com/user/claybutlermusic', 'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
} },
}, { }, {
# ~May 2009 swfArgs. ytcfg is spread out over various vars # ~May 2009 swfArgs. ytcfg is spread out over various vars
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
@ -559,7 +580,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 754, 'duration': 754,
} },
}, { }, {
# ~June 2012. Upload date is in another lang so cannot extract. # ~June 2012. Upload date is in another lang so cannot extract.
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
@ -573,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'BlackNerdComedy', 'uploader': 'BlackNerdComedy',
'duration': 182, 'duration': 182,
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
} },
}, { }, {
# ~July 2013 # ~July 2013
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
@ -589,7 +610,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
'upload_date': '20060428', 'upload_date': '20060428',
'uploader': 'punkybird', 'uploader': 'punkybird',
} },
}, { }, {
# April 2020: Player response in player config # April 2020: Player response in player config
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
@ -606,7 +627,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
'uploader_url': 'https://www.youtube.com/user/GameGrumps', 'uploader_url': 'https://www.youtube.com/user/GameGrumps',
} },
}, { }, {
# watch7-user-header with yt-user-info # watch7-user-header with yt-user-info
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
@ -621,7 +642,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)', 'thumbnail': r're:https?://.*\.(jpg|webp)',
'upload_date': '20150503', 'upload_date': '20150503',
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
} },
}, { }, {
# April 2012 # April 2012
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
@ -636,35 +657,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 200, 'duration': 200,
'upload_date': '20120407', 'upload_date': '20120407',
'uploader_id': 'thecomputernerd01', 'uploader_id': 'thecomputernerd01',
} },
}, { }, {
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
'only_matching': True 'only_matching': True,
}, { }, {
# Video not archived, only capture is unavailable video page # Video not archived, only capture is unavailable video page
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
'only_matching': True 'only_matching': True,
}, { # Encoded url }, { # Encoded url
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer', 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'ytarchive:BaW_jenozKc:20050214000000', 'url': 'ytarchive:BaW_jenozKc:20050214000000',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'ytarchive:BaW_jenozKc', 'url': 'ytarchive:BaW_jenozKc',
'only_matching': True 'only_matching': True,
}, },
] ]
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
@ -675,13 +696,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet( _YT_ALL_THUMB_SERVERS = orderedSet(
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
_OLDEST_CAPTURE_DATE = 20050214000000 _OLDEST_CAPTURE_DATE = 20050214000000
_NEWEST_CAPTURE_DATE = 20500101000000 _NEWEST_CAPTURE_DATE = 20500101000000
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
query = { query = {
'url': url, 'url': url,
@ -690,14 +711,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
'limit': 500, 'limit': 500,
'filter': ['statuscode:200'] + (filters or []), 'filter': ['statuscode:200'] + (filters or []),
'collapse': collapse or [], 'collapse': collapse or [],
**(query or {}) **(query or {}),
} }
res = self._download_json( res = self._download_json(
'https://web.archive.org/cdx/search/cdx', item_id, 'https://web.archive.org/cdx/search/cdx', item_id,
note or 'Downloading CDX API JSON', query=query, fatal=fatal) note or 'Downloading CDX API JSON', query=query, fatal=fatal)
if isinstance(res, list) and len(res) >= 2: if isinstance(res, list) and len(res) >= 2:
# format response to make it easier to use # format response to make it easier to use
return list(dict(zip(res[0], v)) for v in res[1:]) return [dict(zip(res[0], v)) for v in res[1:]]
elif not isinstance(res, list) or len(res) != 0: elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message()) self.report_warning('Error while parsing CDX API response' + bug_reports_message())
@ -854,7 +875,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
{ {
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
'filesize': int_or_none(thumbnail_dict.get('length')), 'filesize': int_or_none(thumbnail_dict.get('length')),
'preference': int_or_none(thumbnail_dict.get('length')) 'preference': int_or_none(thumbnail_dict.get('length')),
} for thumbnail_dict in response) } for thumbnail_dict in response)
if not try_all: if not try_all:
break break
@ -895,7 +916,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
for retry in retry_manager: for retry in retry_manager:
try: try:
urlh = self._request_webpage( urlh = self._request_webpage(
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
video_id, note='Fetching archived video file url', expected_status=True) video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e: except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved. # HTTP Error 404 is expected if the video is not saved.
@ -926,258 +947,24 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id) info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh: if urlh:
url = compat_urllib_parse_unquote(urlh.url) url = urllib.parse.unquote(urlh.url)
video_file_url_qs = parse_qs(url) video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers # Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
if itag and itag in YoutubeIE._formats: if itag and itag in YoutubeIE._formats:
format.update(YoutubeIE._formats[itag]) fmt.update(YoutubeIE._formats[itag])
format.update({'format_id': itag}) fmt.update({'format_id': itag})
else: else:
mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
ext = (mimetype2ext(mime) ext = (mimetype2ext(mime)
or urlhandle_detect_ext(urlh) or urlhandle_detect_ext(urlh)
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
format.update({'ext': ext}) fmt.update({'ext': ext})
info['formats'] = [format] info['formats'] = [fmt]
if not info.get('duration'): if not info.get('duration'):
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
if not info.get('title'): if not info.get('title'):
info['title'] = video_id info['title'] = video_id
return info return info
class VLiveWebArchiveIE(InfoExtractor):
IE_NAME = 'web.archive:vlive'
IE_DESC = 'web.archive.org saved vlive videos'
_VALID_URL = r'''(?x)
(?:https?://)?web\.archive\.org/
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
(?:https?(?::|%3[Aa])//)?(?:
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
)
'''
_TESTS = [{
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
'uploader_url': None,
'uploader': None,
'upload_date': '20150817',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1439816449,
'like_count': int,
'channel': 'Girl\'s Day',
'channel_id': 'FDF27',
'comment_count': int,
'release_timestamp': 1439818140,
'release_date': '20150817',
'duration': 1014,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
'title': '첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20161112',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1478923074,
'like_count': int,
'channel': 'EXO',
'channel_id': 'F94BD',
'comment_count': int,
'release_timestamp': 1478924280,
'release_date': '20161112',
'duration': 906,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
'info_dict': {
'id': '101870',
'ext': 'mp4',
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
'creator': 'Dispatch',
'view_count': int,
'subtitles': 'mincount:6',
'uploader_id': 'V__FRA08071',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20181130',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1543601327,
'like_count': int,
'channel': 'Dispatch',
'channel_id': 'C796F3',
'comment_count': int,
'release_timestamp': 1543601040,
'release_date': '20181130',
'duration': 279,
},
'params': {
'skip_download': True,
},
}]
# The wayback machine has special timestamp and "mode" values:
# timestamp:
# 1 = the first capture
# 2 = the last capture
# mode:
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
for retry in self.RetryManager():
try:
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
raise ExtractorError('Page was not archived', expected=True)
retry.error = e
continue
def _download_archived_json(self, url, video_id, **kwargs):
page = self._download_archived_page(url, video_id, **kwargs)
if not page:
raise ExtractorError('Page was not archived', expected=True)
else:
return self._parse_json(page, video_id)
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
if not m3u8_doc:
return
# M3U8 document should be changed to archive domain
m3u8_doc = m3u8_doc.splitlines()
url_base = m3u8_url.rsplit('/', 1)[0]
first_segment = None
for i, line in enumerate(m3u8_doc):
if not line.startswith('#'):
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
first_segment = first_segment or m3u8_doc[i]
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
fatal=False, note='Check first segment availablity')
if urlh:
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
if subtitles:
self._report_ignoring_subs('m3u8')
return formats
# Closely follows the logic of the ArchiveTeam grab script
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
def _real_extract(self, url):
video_id, url_date = self._match_valid_url(url).group('id', 'date')
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
user_country = traverse_obj(player_info, ('common', 'userCountry'))
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
inkey = self._download_archived_json(
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
'appId': app_id,
'platformType': 'PC',
'gcc': user_country,
'locale': 'en_US',
}, fatal=False)
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
vod_data = self._download_archived_json(
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
'key': inkey.get('inkey'),
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
'sid': '2024',
'ver': '2.0',
'devt': 'html5_pc',
'doct': 'json',
'ptc': 'https',
'sptc': 'https',
'cpt': 'vtt',
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
'pv': '4.26.9',
'dr': '1920x1080',
'cpl': 'en_US',
'lc': 'en_US',
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
'adu': '%2F',
'videoId': vod_id,
'cc': user_country,
})
formats = []
streams = traverse_obj(vod_data, ('streams', ...))
if len(streams) > 1:
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
stream = streams[0]
max_stream = max(
stream.get('videos') or [],
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_stream is not None:
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
# For parts of the project MP4 files were archived
max_video = max(
traverse_obj(vod_data, ('videos', 'list', ...)),
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_video is not None:
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
fatal=False, note='Check video availablity')
if urlh:
formats.append({'url': video_url})
return {
'id': video_id,
'formats': formats,
**traverse_obj(player_info, ('postDetail', 'post', {
'title': ('officialVideo', 'title', {str}),
'creator': ('author', 'nickname', {str}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelCode', {str}),
'duration': ('officialVideo', 'playTime', {int_or_none}),
'view_count': ('officialVideo', 'playCount', {int_or_none}),
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
})),
**traverse_obj(vod_data, ('meta', {
'uploader_id': ('user', 'id', {str}),
'uploader': ('user', 'name', {str}),
'uploader_url': ('user', 'url', {url_or_none}),
'thumbnail': ('cover', 'source', {url_or_none}),
}), expected_type=lambda x: x or None),
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
}

View File

@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
extract_attributes, extract_attributes,
int_or_none, int_or_none,
join_nonempty,
parse_iso8601, parse_iso8601,
try_get, try_get,
) )
@ -11,7 +12,7 @@ from ..utils import (
class ArcPublishingIE(InfoExtractor): class ArcPublishingIE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX _VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
_TESTS = [{ _TESTS = [{
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor):
def _extract_embed_urls(cls, url, webpage): def _extract_embed_urls(cls, url, webpage):
entries = [] entries = []
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
powa = extract_attributes(powa_el) or {} powa = extract_attributes(powa_el) or {}
org = powa.get('data-org') org = powa.get('data-org')
uuid = powa.get('data-uuid') uuid = powa.get('data-uuid')
if org and uuid: if org and uuid:
entries.append('arcpublishing:%s:%s' % (org, uuid)) entries.append(f'arcpublishing:{org}:{uuid}')
return entries return entries
def _real_extract(self, url): def _real_extract(self, url):
@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor):
elif stream_type in ('ts', 'hls'): elif stream_type in ('ts', 'hls'):
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
if all([f.get('acodec') == 'none' for f in m3u8_formats]): if all(f.get('acodec') == 'none' for f in m3u8_formats):
continue continue
for f in m3u8_formats: for f in m3u8_formats:
height = f.get('height') height = f.get('height')
@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
else: else:
vbr = int_or_none(s.get('bitrate')) vbr = int_or_none(s.get('bitrate'))
formats.append({ formats.append({
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, 'format_id': join_nonempty(stream_type, vbr),
'vbr': vbr, 'vbr': vbr,
'width': int_or_none(s.get('width')), 'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')), 'height': int_or_none(s.get('height')),

View File

@ -1,24 +1,25 @@
import json import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .generic import GenericIE
from ..utils import ( from ..utils import (
OnDemandPagedList,
bug_reports_message,
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
join_nonempty,
jwt_decode_hs256,
make_archive_id,
parse_duration, parse_duration,
qualities, parse_iso8601,
remove_start,
str_or_none, str_or_none,
try_get,
unified_strdate, unified_strdate,
unified_timestamp,
update_url,
update_url_query, update_url_query,
url_or_none, url_or_none,
xpath_text, xpath_text,
) )
from ..compat import compat_etree_fromstring from ..utils.traversal import traverse_obj
class ARDMediathekBaseIE(InfoExtractor): class ARDMediathekBaseIE(InfoExtractor):
@ -61,45 +62,6 @@ class ARDMediathekBaseIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _ARD_extract_episode_info(self, title):
"""Try to extract season/episode data from the title."""
res = {}
if not title:
return res
for pattern in [
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
# E.g.: title="Folge 25/42: Symmetrie"
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
# E.g.: title="Folge 1063 - Vertrauen"
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
]:
m = re.match(pattern, title)
if m:
groupdict = m.groupdict()
res['season_number'] = int_or_none(groupdict.get('season_number'))
res['episode_number'] = int_or_none(groupdict.get('episode_number'))
res['episode'] = str_or_none(groupdict.get('episode'))
# Build the episode title by removing numeric episode information:
if groupdict.get('ep_info') and not res['episode']:
res['episode'] = str_or_none(
title.replace(groupdict.get('ep_info'), ''))
if res['episode']:
res['episode'] = res['episode'].strip()
break
# As a fallback use the whole title as the episode name:
if not res.get('episode'):
res['episode'] = title.strip()
return res
def _extract_formats(self, media_info, video_id): def _extract_formats(self, media_info, video_id):
type_ = media_info.get('_type') type_ = media_info.get('_type')
media_array = media_info.get('_mediaArray', []) media_array = media_info.get('_mediaArray', [])
@ -123,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
update_url_query(stream_url, { update_url_query(stream_url, {
'hdcore': '3.1.1', 'hdcore': '3.1.1',
'plugin': 'aasp-3.1.1.69.124' 'plugin': 'aasp-3.1.1.69.124',
}), video_id, f4m_id='hds', fatal=False)) }), video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
@ -134,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
f = { f = {
'url': server, 'url': server,
'play_path': stream_url, 'play_path': stream_url,
'format_id': 'a%s-rtmp-%s' % (num, quality), 'format_id': f'a{num}-rtmp-{quality}',
} }
else: else:
f = { f = {
'url': stream_url, 'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality) 'format_id': f'a{num}-{ext}-{quality}',
} }
m = re.search( m = re.search(
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
@ -155,144 +117,12 @@ class ARDMediathekBaseIE(InfoExtractor):
return formats return formats
class ARDMediathekIE(ARDMediathekBaseIE):
IE_NAME = 'ARD:mediathek'
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
# available till 26.07.2022
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
'info_dict': {
'id': '44726822',
'ext': 'mp4',
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
'duration': 1740,
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
'only_matching': True,
}, {
# audio
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
'only_matching': True,
}, {
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'only_matching': True,
}, {
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'only_matching': True,
}, {
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
def _real_extract(self, url):
# determine video id from url
m = self._match_valid_url(url)
document_id = None
numid = re.search(r'documentId=([0-9]+)', url)
if numid:
document_id = video_id = numid.group(1)
else:
video_id = m.group('video_id')
webpage = self._download_webpage(url, video_id)
ERRORS = (
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'),
)
for pattern, message in ERRORS:
if pattern in webpage:
raise ExtractorError(message % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
'description', webpage, 'meta description', default=None)
if description is None:
description = self._html_search_regex(
r'<p\s+class="teasertext">(.+?)</p>',
webpage, 'teaser text', default=None)
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
# structure altogether.
thumbnail = self._og_search_thumbnail(webpage, default=None)
media_streams = re.findall(r'''(?x)
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
"([^"]+)"''', webpage)
if media_streams:
QUALITIES = qualities(['lo', 'hi', 'hq'])
formats = []
for furl in set(media_streams):
if furl.endswith('.f4m'):
fid = 'f4m'
else:
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
fid = fid_m.group(1) if fid_m else None
formats.append({
'quality': QUALITIES(fid),
'format_id': fid,
'url': furl,
})
info = {
'formats': formats,
}
else: # request JSON file
if not document_id:
video_id = self._search_regex(
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
webpage, 'media id', default=None)
info = self._extract_media_info(
'http://www.ardmediathek.de/play/media/%s' % video_id,
webpage, video_id)
info.update({
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
})
info.update(self._ARD_extract_episode_info(info['title']))
return info
class ARDIE(InfoExtractor): class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
_TESTS = [{ _TESTS = [{
# available till 7.12.2023 # available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': 'a438f671e87a7eba04000336a119ccc4', 'md5': '94812e6438488fb923c361a44469614b',
'info_dict': { 'info_dict': {
'id': 'maischberger-video-424', 'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424', 'display_id': 'maischberger-video-424',
@ -399,31 +229,36 @@ class ARDIE(InfoExtractor):
} }
class ARDBetaMediathekIE(ARDMediathekBaseIE): class ARDBetaMediathekIE(InfoExtractor):
_VALID_URL = r'''(?x)https:// IE_NAME = 'ARDMediathek'
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)? (?:[^/]+/)?
(?:player|live|video|(?P<playlist>sendung|sammlung))/ (?:player|live|video)/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? (?:[^?#]+/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) (?P<id>[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' /?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
_TESTS = [{ _TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI', 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'md5': '3fd5fead7a370a819341129c8d713136', 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
'info_dict': { 'info_dict': {
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen', 'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'id': '12172961', 'id': '12939099',
'title': 'Wolfsland - Die traurigen Schwestern', 'title': 'Liebe auf vier Pfoten',
'description': r're:^Als der Polizeiobermeister Raaben', 'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
'duration': 5241, 'duration': 5222,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
'timestamp': 1670710500, 'timestamp': 1701343800,
'upload_date': '20221210', 'upload_date': '20231130',
'ext': 'mp4', 'ext': 'mp4',
'age_limit': 12, 'episode': 'Liebe auf vier Pfoten',
'episode': 'Wolfsland - Die traurigen Schwestern', 'series': 'Filme im MDR',
'series': 'Filme im MDR' 'age_limit': 0,
'channel': 'MDR',
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
}, },
}, { }, {
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
@ -450,11 +285,49 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'timestamp': 1636398000, 'timestamp': 1636398000,
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
'upload_date': '20211108', 'upload_date': '20211108',
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste', 'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
'duration': 915, 'duration': 915,
'episode': 'tagesschau, 20:00 Uhr', 'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau', 'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
'channel': 'ARD-Aktuell',
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
},
}, {
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'md5': 'c428b9effff18ff624d4f903bda26315',
'info_dict': {
'id': '94834686',
'ext': 'mp4',
'duration': 2670,
'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005',
'timestamp': 1696491171,
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...',
'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
},
}, {
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'info_dict': {
'id': '13847165',
'chapters': 'count:8',
'ext': 'mp4',
'channel': 'WDR',
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'series': 'Lokalzeit aus Düsseldorf',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'upload_date': '20241031',
'timestamp': 1730399400,
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
'duration': 1759,
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -471,203 +344,260 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
}, { }, {
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
'only_matching': True,
}]
def _extract_episode_info(self, title):
patterns = [
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
# E.g.: title="Folge 25/42: Symmetrie"
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
# E.g.: title="Folge 1063 - Vertrauen"
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
# As a fallback use the full title
r'(?P<title>.*)',
]
return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
'season_number': ('season_number', {int_or_none}),
'episode_number': ('episode_number', {int_or_none}),
'episode': ((
('episode', {str_or_none}),
('ep_info', {lambda x: title.replace(x, '')}),
('title', {str}),
), {str.strip}),
}), get_all=False)
def _real_extract(self, url):
display_id = self._match_id(url)
query = {'embedded': 'false', 'mcV6': 'true'}
headers = {}
if self._get_cookies(self._TOKEN_URL).get('ams'):
token = self._download_json(
self._TOKEN_URL, display_id, 'Fetching token for age verification',
'Unable to fetch age verification token', fatal=False)
id_token = traverse_obj(token, ('idToken', {str}))
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
if not user_id:
self.report_warning('Unable to extract token, continuing without authentication')
else:
headers['x-authorization'] = f'Bearer {id_token}'
query['userId'] = user_id
if decoded_token.get('age_rating') != 18:
self.report_warning('Account is not verified as 18+; video may be unavailable')
page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
display_id, query=query, headers=headers)
# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
if old_id is not None:
video_id = str(old_id)
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
else:
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
video_id = display_id
archive_ids = None
player_data = traverse_obj(
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
is_live = player_data.get('type') == 'player_live'
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
if player_data.get('blockedByFsk'):
self.raise_login_required('This video is only available for age verified users or after 22:00')
formats = []
subtitles = {}
for stream in traverse_obj(media_data, ('streams', ..., {dict})):
kind = stream.get('kind')
# Prioritize main stream over sign language and others
preference = 1 if kind == 'main' else None
for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
media_url = media['url']
audio_kind = traverse_obj(media, (
'audios', 0, 'kind', {str}), default='').replace('standard', '')
lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
lang = join_nonempty(lang_code, audio_kind)
language_preference = 10 if lang == 'deu' else -10
if determine_ext(media_url) == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
for f in fmts:
f['language'] = lang
f['language_preference'] = language_preference
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': media_url,
'format_id': f'http-{kind}',
'preference': preference,
'language': lang,
'language_preference': language_preference,
**traverse_obj(media, {
'format_note': ('forcedLabel', {str}),
'width': ('maxHResolutionPx', {int_or_none}),
'height': ('maxVResolutionPx', {int_or_none}),
'vcodec': ('videoCodec', {str}),
}),
})
for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
'url': sources['url'],
'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
})
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'age_limit': age_limit,
**traverse_obj(media_data, {
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
'start_time': ('chapterTime', {int_or_none}),
'title': ('chapterTitle', {str}),
}),
}),
**traverse_obj(media_data, ('meta', {
'title': 'title',
'description': 'synopsis',
'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
'series': 'seriesTitle',
'thumbnail': ('images', 0, 'url', {url_or_none}),
'duration': ('durationSeconds', {int_or_none}),
'channel': 'clipSourceName',
})),
**self._extract_episode_info(page_data.get('title')),
'_old_archive_ids': archive_ids,
}
class ARDMediathekCollectionIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)?
(?P<playlist>sendung|serie|sammlung)/
(?:(?P<display_id>[^?#]+?)/)?
(?P<id>[a-zA-Z0-9]+)
(?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
'info_dict': {
'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
'display_id': 'quiz/staffel-1-originalversion',
'title': 'Staffel 1 Originalversion',
},
'playlist_count': 3,
}, {
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
'title': 'Staffel 4 mit Audiodeskription',
},
'playlist_count': 12,
}, {
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
'display_id': 'babylon-berlin/staffel-1',
'title': 'Staffel 1',
},
'playlist_count': 8,
}, {
'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
'display_id': 'tatort',
'title': 'Tatort',
},
'playlist_mincount': 500,
}, {
'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
'info_dict': {
'id': '5eOHzt8XB2sqeFXbIoJlg2',
'display_id': 'die-kirche-bleibt-im-dorf',
'title': 'Die Kirche bleibt im Dorf',
'description': 'Die Kirche bleibt im Dorf',
},
'playlist_count': 4,
}, { }, {
# playlist of type 'sendung' # playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True, 'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, { }, {
# playlist of type 'sammlung' # playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
'only_matching': True,
}] }]
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): _PAGE_SIZE = 100
""" Query the ARD server for playlist information
and returns the data in "raw" format """
if mode == 'sendung':
graphQL = json.dumps({
'query': '''{
showPage(
client: "%s"
showId: "%s"
pageNumber: %d
) {
pagination {
pageSize
totalElements
}
teasers { # Array
mediumTitle
links { target { id href title } }
type
}
}}''' % (client, playlist_id, pageNumber),
}).encode()
else: # mode == 'sammlung'
graphQL = json.dumps({
'query': '''{
morePage(
client: "%s"
compilationId: "%s"
pageNumber: %d
) {
widget {
pagination {
pageSize
totalElements
}
teasers { # Array
mediumTitle
links { target { id href title } }
type
}
}
}}''' % (client, playlist_id, pageNumber),
}).encode()
# Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway
show_page = self._download_json(
'https://api.ardmediathek.de/public-gateway',
'[Playlist] %s' % display_id,
data=graphQL,
headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data:
if mode == 'sendung':
show_page = show_page['showPage']
else: # mode == 'sammlung'
show_page = show_page['morePage']['widget']
return show_page
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung' and 'sammlung', and also nested
playlists. """
entries = []
pageNumber = 0
while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snipped(
playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']:
# alternativ cond.: teaser['type'] == "compilation"
# => This is an nested compilation, e.g. like:
# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
link_mode = 'sammlung'
else:
link_mode = 'video'
item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
client, link_mode, display_id,
# perform HTLM quoting of episode title similar to ARD:
re.sub('^-|-$', '', # remove '-' from begin/end
re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
teaser['links']['target']['title'].lower()
.replace('ä', 'ae').replace('ö', 'oe')
.replace('ü', 'ue').replace('ß', 'ss'))),
teaser['links']['target']['id'])
entries.append(self.url_result(
item_url,
ie=ARDBetaMediathekIE.ie_key()))
if (show_page['pagination']['pageSize'] * (pageNumber + 1)
>= show_page['pagination']['totalElements']):
# we've processed enough pages to get all playlist entries
break
pageNumber = pageNumber + 1
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
def _real_extract(self, url): def _real_extract(self, url):
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
'id', 'display_id', 'playlist', 'client', 'season') 'id', 'display_id', 'playlist', 'season', 'version')
display_id, client = display_id or video_id, client or 'ard'
if playlist_type: def call_api(page_num):
# TODO: Extract only specified season api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) return self._download_json(
f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
f'Downloading playlist page {page_num}', query={
'pageNumber': page_num,
'pageSize': self._PAGE_SIZE,
**({
'seasoned': 'true',
'seasonNumber': season_number,
'withOriginalversion': 'true' if version == 'OV' else 'false',
'withAudiodescription': 'true' if version == 'AD' else 'false',
} if season_number else {}),
})
player_page = self._download_json( def fetch_page(page_num):
'https://api.ardmediathek.de/public-gateway', for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
display_id, data=json.dumps({ item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
'query': '''{ if not item_id or item_id == playlist_id:
playerPage(client:"%s", clipId: "%s") { continue
blockedByFsk item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
broadcastedOn yield self.url_result(
maturityContentRating f'https://www.ardmediathek.de/{item_mode}/{item_id}',
mediaCollection { ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
_duration **traverse_obj(item, {
_geoblocked 'id': ('id', {str}),
_isLive 'title': ('longTitle', {str}),
_mediaArray { 'duration': ('duration', {int_or_none}),
_mediaStreamArray { 'timestamp': ('broadcastedOn', {parse_iso8601}),
_quality }))
_server
_stream
}
}
_previewImage
_subtitleUrl
_type
}
show {
title
}
image {
src
}
synopsis
title
tracking {
atiCustomVars {
contentId
}
}
}
}''' % (client, video_id),
}).encode(), headers={
'Content-Type': 'application/json'
})['data']['playerPage']
title = player_page['title']
content_id = str_or_none(try_get(
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
media_collection = player_page.get('mediaCollection') or {}
if not media_collection and content_id:
media_collection = self._download_json(
'https://www.ardmediathek.de/play/media/' + content_id,
content_id, fatal=False) or {}
info = self._parse_media_info(
media_collection, content_id or video_id,
player_page.get('blockedByFsk'))
age_limit = None
description = player_page.get('synopsis')
maturity_content_rating = player_page.get('maturityContentRating')
if maturity_content_rating:
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
if not age_limit and description:
age_limit = int_or_none(self._search_regex(
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
info.update({
'age_limit': age_limit,
'display_id': display_id,
'title': title,
'description': description,
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
'series': try_get(player_page, lambda x: x['show']['title']),
'thumbnail': (media_collection.get('_previewImage')
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
or self.get_thumbnail_from_html(display_id, url)),
})
info.update(self._ARD_extract_episode_info(info['title']))
return info
def get_thumbnail_from_html(self, display_id, url): page_data = call_api(0)
webpage = self._download_webpage(url, display_id, fatal=False) or '' full_id = join_nonempty(playlist_id, season_number, version, delim='_')
return (
self._og_search_thumbnail(webpage, default=None) return self.playlist_result(
or self._html_search_meta('thumbnailUrl', webpage, default=None)) OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
title=page_data.get('title'), description=page_data.get('synopsis'))

View File

@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
raise ExtractorError('Invalid URL', expected=True) raise ExtractorError('Invalid URL', expected=True)
media = self._download_json( media = self._download_json(
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
video_id, query={ video_id, query={
# https://video.qbrick.com/docs/api/examples/library-api.html # https://video.qbrick.com/docs/api/examples/library-api.html
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False)) href, video_id, f4m_id='hds', fatal=False))
elif mime_type == 'application/dash+xml': elif mime_type == 'application/dash+xml':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_mpd_formats(
href, video_id, f4m_id='hds', fatal=False)) href, video_id, mpd_id='dash', fatal=False))
elif mime_type == 'application/vnd.ms-sstr+xml': elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False)) href, video_id, ism_id='mss', fatal=False))

View File

@ -1,11 +1,9 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
format_field,
float_or_none, float_or_none,
format_field,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
remove_start, remove_start,
@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
'view_count': int, 'view_count': int,
'tags': ['linearna_algebra'], 'tags': ['linearna_algebra'],
'start_time': 10, 'start_time': 10,
} },
}, { }, {
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
'only_matching': True, 'only_matching': True,
@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
'duration': float_or_none(video.get('duration'), 1000), 'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')), 'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'), 'tags': video.get('hashtags'),
'start_time': int_or_none(compat_parse_qs( 'start_time': int_or_none(urllib.parse.parse_qs(
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), urllib.parse.urlparse(url).query).get('t', [None])[0]),
} }

View File

@ -0,0 +1,303 @@
import re
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class Art19IE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
_VALID_URL = [
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
_TESTS = [{
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
'info_dict': {
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'ext': 'mp3',
'title': 'Why Did DeSantis Drop Out?',
'series': 'The Daily Briefing',
'release_timestamp': 1705941275,
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
'episode': 'Episode 582',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
'upload_date': '20240122',
'timestamp': 1705940815,
'episode_number': 582,
'modified_date': '20240122',
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'modified_timestamp': 1705941275,
'release_date': '20240122',
'duration': 527.4,
},
}, {
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
'info_dict': {
'id': '8319b776-4153-4d22-8630-631f204a03dd',
'ext': 'mp3',
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
'modified_date': '20240116',
'upload_date': '20240105',
'modified_timestamp': 1705435802,
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
'release_timestamp': 1705305660,
'release_date': '20240115',
'timestamp': 1704481536,
'episode_number': 88,
'series': 'Scamfluencers',
'duration': 2588.37501,
'episode': 'Episode 88',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
'info_dict': {
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'ext': 'mp3',
'title': "'Verstappen wordt een synoniem voor Formule 1'",
'season': 'Seizoen 6',
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'duration': 3061.82111,
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
'release_date': '20231126',
'modified_timestamp': 1701156004,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'season_number': 6,
'episode_number': 52,
'modified_date': '20231128',
'upload_date': '20231126',
'timestamp': 1701025981,
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
'series': 'De Boordradio',
'release_timestamp': 1701026308,
'episode': 'Episode 52',
},
}, {
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
'info_dict': {
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'ext': 'mp3',
'title': 'Larry Bucshon announces retirement from congress',
'upload_date': '20240115',
'episode_number': 148,
'episode': 'Episode 148',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20240115',
'timestamp': 1705328205,
'release_timestamp': 1705329275,
'series': 'All INdiana Politics',
'modified_date': '20240117',
'modified_timestamp': 1705458901,
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
'duration': 1256.18848,
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for episode_id in re.findall(
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
def _real_extract(self, url):
episode_id = self._match_id(url)
player_metadata = self._download_json(
f'https://art19.com/episodes/{episode_id}', episode_id,
note='Downloading player metadata', fatal=False,
headers={'Accept': 'application/vnd.art19.v0+json'})
rss_metadata = self._download_json(
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
note='Downloading RSS metadata')
formats = [{
'format_id': 'direct',
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
'vcodec': 'none',
'acodec': 'mp3',
}]
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
if fmt_id == 'waveform_bin':
continue
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
if not fmt_url:
continue
formats.append({
'format_id': fmt_id,
'url': fmt_url,
'vcodec': 'none',
'acodec': fmt_id,
'quality': -2 if fmt_id == 'ogg' else -1,
})
return {
'id': episode_id,
'formats': formats,
**traverse_obj(player_metadata, ('episode', {
'title': ('title', {str}),
'description': ('description_plain', {str}),
'episode_id': ('id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season_id': ('season_id', {str}),
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),
'description': ('episode_description_plain', {str}),
'episode_id': ('episode_id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season': ('season_title', {str}),
'season_id': ('season_id', {str}),
'season_number': ('season_number', {int_or_none}),
'series': ('series_title', {str}),
'series_id': ('series_id', {str}),
'thumbnail': ('cover_image', {url_or_none}),
'duration': ('duration', {float_or_none}),
})),
}
class Art19ShowIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
_VALID_URL = [
rf'{_VALID_URL_BASE}(?:$|[#?])',
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
_TESTS = [{
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://www.art19.com/shows/echt-gebeurd',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://rss.art19.com/scamfluencers',
'info_dict': {
'_type': 'playlist',
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'display_id': 'scamfluencers',
'title': 'Scamfluencers',
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
'timestamp': 1647368573,
'upload_date': '20220315',
'modified_timestamp': int,
'modified_date': str,
'tags': [],
},
'playlist_mincount': 90,
}, {
'url': 'https://art19.com/shows/enthuellt/embed',
'info_dict': {
'_type': 'playlist',
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
'display_id': 'enthuellt',
'title': 'Enthüllt',
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
'timestamp': 1601645860,
'upload_date': '20201002',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:10',
},
'playlist_mincount': 10,
}]
_WEBPAGE_TESTS = [{
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
'info_dict': {
'_type': 'playlist',
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
'display_id': 'deconstructing-yourself',
'title': 'Deconstructing Yourself',
'description': 'md5:dab5082b28b248a35476abf64768854d',
'timestamp': 1570581181,
'upload_date': '20191009',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:5',
},
'playlist_mincount': 80,
}, {
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
'info_dict': {
'_type': 'playlist',
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
'display_id': 'the-ben-joravsky-show',
'title': 'The Ben Joravsky Show',
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
'timestamp': 1550875095,
'upload_date': '20190222',
'modified_timestamp': int,
'modified_date': str,
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
},
'playlist_mincount': 1900,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for series_id in re.findall(
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
yield f'https://art19.com/shows/{series_id}'
def _real_extract(self, url):
series_id = self._match_id(url)
series_metadata = self._download_json(
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
headers={'Accept': 'application/vnd.art19.v0+json'})
return {
'_type': 'playlist',
'entries': [
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
],
**traverse_obj(series_metadata, ('series', {
'id': ('id', {str}),
'display_id': ('slug', {str}),
'title': ('title', {str}),
'description': ('description_plain', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
}

View File

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
join_nonempty,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
strip_or_none, strip_or_none,
@ -19,46 +20,22 @@ class ArteTVBaseIE(InfoExtractor):
class ArteTVIE(ArteTVBaseIE): class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = rf'''(?x)
(?:https?:// (?:https?://
(?: (?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos| (?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s) api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
) )
|arte://program) |arte://program)
/(?P<id>\d{6}-\d{3}-[AF]|LIVE) /(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} '''
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'info_dict': {
'id': '100103-000-A',
'title': 'USA: Dyskryminacja na porodówce',
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
'alt_title': 'ARTE Reportage',
'upload_date': '20201103',
'duration': 554,
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
'timestamp': 1604417980,
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'}
}, { }, {
'note': 'No alt_title', 'note': 'No alt_title',
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/', 'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
'info_dict': { 'only_matching': True,
'id': '110371-000-A',
'ext': 'mp4',
'upload_date': '20220718',
'duration': 154,
'timestamp': 1658162460,
'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
'title': 'La chaleur, supplice des arbres de rue',
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
},
'params': {'skip_download': 'm3u8'}
}, { }, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True, 'only_matching': True,
@ -67,19 +44,38 @@ class ArteTVIE(ArteTVBaseIE):
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'only_matching': True,
}, {
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
'info_dict': { 'info_dict': {
'id': '110203-006-A', 'id': '109067-000-A',
'chapters': 'count:16', 'ext': 'mp4',
'description': 'md5:cf592f1df52fe52007e3f8eac813c084', 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
'alt_title': 'Zaz', 'timestamp': 1713927600,
'title': 'Baloise Session 2022', 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
'timestamp': 1668445200, 'duration': 7599,
'duration': 4054, 'title': 'La loi de Téhéran',
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', 'upload_date': '20240424',
'upload_date': '20221114', 'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
'fr-forced': 'mincount:1',
},
},
}, {
'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
'info_dict': {
'id': '006785-000-A',
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
'title': 'The Element of Crime',
'timestamp': 1696111200,
'duration': 5849,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
'upload_date': '20230930',
'ext': 'mp4', 'ext': 'mp4',
}, },
'expected_warnings': ['geo restricted'] 'skip': '404 Not Found',
}] }]
_GEO_BYPASS = True _GEO_BYPASS = True
@ -130,13 +126,27 @@ class ArteTVIE(ArteTVBaseIE):
), ),
} }
@staticmethod
def _fix_accessible_subs_locale(subs):
updated_subs = {}
for lang, sub_formats in subs.items():
for fmt in sub_formats:
url = fmt.get('url') or ''
suffix = ('acc' if url.endswith('-MAL.m3u8')
else 'forced' if '_VO' not in url
else None)
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
return updated_subs
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2') lang = mobj.group('lang') or mobj.group('lang_2')
langauge_code = self._LANG_MAP.get(lang) language_code = self._LANG_MAP.get(lang)
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18',
})
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
if geoblocking.get('restrictedArea'): if geoblocking.get('restrictedArea'):
@ -160,16 +170,16 @@ class ArteTVIE(ArteTVBaseIE):
m = self._VERSION_CODE_RE.match(stream_version_code) m = self._VERSION_CODE_RE.match(stream_version_code)
if m: if m:
lang_pref = int(''.join('01'[x] for x in ( lang_pref = int(''.join('01'[x] for x in (
m.group('vlang') == langauge_code, # we prefer voice in the requested language m.group('vlang') == language_code, # we prefer voice in the requested language
not m.group('audio_desc'), # and not the audio description version not m.group('audio_desc'), # and not the audio description version
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
not m.group('has_sub'), # but we prefer no subtitles otherwise not m.group('has_sub'), # but we prefer no subtitles otherwise
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
))) )))
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
if stream['protocol'].startswith('HLS'): if 'HLS' in stream['protocol']:
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
for fmt in fmts: for fmt in fmts:
@ -181,6 +191,7 @@ class ArteTVIE(ArteTVBaseIE):
secondary_formats.extend(fmts) secondary_formats.extend(fmts)
else: else:
formats.extend(fmts) formats.extend(fmts)
subs = self._fix_accessible_subs_locale(subs)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif stream['protocol'] in ('HTTPS', 'RTMP'): elif stream['protocol'] in ('HTTPS', 'RTMP'):
@ -236,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116', 'upload_date': '20201116',
}, },
'skip': 'No video available' 'skip': 'No video available',
}, { }, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True, 'only_matching': True,
@ -251,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVPlaylistIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'only_matching': True, 'only_matching': True,
@ -287,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
class ArteTVCategoryIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'url': 'https://www.arte.tv/en/videos/politics-and-society/',
'info_dict': { 'info_dict': {
@ -301,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return ( return (
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
and super().suitable(url)) and super().suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
@ -310,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
items = [] items = []
for video in re.finditer( for video in re.finditer(
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
webpage): webpage):
video = video.group('url') video = video.group('url')
if video == url: if video == url:
continue continue
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
items.append(video) items.append(video)
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None

View File

@ -1,196 +0,0 @@
import functools
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
int_or_none,
OnDemandPagedList,
parse_age_limit,
strip_or_none,
try_get,
)
class AsianCrushBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
_KALTURA_KEYS = [
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
'widescreen_thumbnail_url', 'screencap_widescreen',
]
_API_SUFFIX = {'retrocrush.tv': '-ott'}
def _call_api(self, host, endpoint, video_id, query, resource):
return self._download_json(
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
'Downloading %s JSON metadata' % resource, query=query,
headers=self.geo_verification_headers())['objects']
def _download_object_data(self, host, object_id, resource):
return self._call_api(
host, 'search', object_id, {'id': object_id}, resource)[0]
def _get_object_description(self, obj):
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
def _parse_video_data(self, video):
title = video['name']
entry_id, partner_id = [None] * 2
for k in self._KALTURA_KEYS:
k_url = video.get(k)
if k_url:
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
if mobj:
partner_id, entry_id = mobj.groups()
break
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
categories = list(filter(None, [c.get('name') for c in meta_categories]))
show_info = video.get('show_info') or {}
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
'ie_key': KalturaIE.ie_key(),
'id': entry_id,
'title': title,
'description': self._get_object_description(video),
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
'categories': categories,
'series': show_info.get('show_name'),
'season_number': int_or_none(show_info.get('season_num')),
'season_id': show_info.get('season_id'),
'episode_number': int_or_none(show_info.get('episode_num')),
}
class AsianCrushIE(AsianCrushBaseIE):
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
'age_limit': 13,
'categories': 'count:5',
'duration': 5812,
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
'only_matching': True,
}]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, video_id)
embed_vars = self._parse_json(self._search_regex(
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
default='{}'), video_id, fatal=False) or {}
video_id = embed_vars.get('entry_id') or video_id
video = self._download_object_data(host, video_id, 'video')
return self._parse_video_data(video)
class AsianCrushPlaylistIE(AsianCrushBaseIE):
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
'info_dict': {
'id': '6447',
'title': 'Fruity Samurai',
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
},
'playlist_count': 13,
}, {
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
'only_matching': True,
}]
_PAGE_SIZE = 1000000000
def _fetch_page(self, domain, parent_id, page):
videos = self._call_api(
domain, 'getreferencedobjects', parent_id, {
'max': self._PAGE_SIZE,
'object_type': 'video',
'parent_id': parent_id,
'start': page * self._PAGE_SIZE,
}, 'page %d' % (page + 1))
for video in videos:
yield self._parse_video_data(video)
def _real_extract(self, url):
host, playlist_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._html_extract_title(webpage)
if title:
title = re.sub(r'\s*\|\s*.+?$', '', title)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
else:
show = self._download_object_data(host, playlist_id, 'show')
title = show.get('name')
description = self._get_object_description(show)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, host, playlist_id),
self._PAGE_SIZE)
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -0,0 +1,168 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
merge_dicts,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AsobiChannelBaseIE(InfoExtractor):
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
def _extract_info(self, metadata):
return traverse_obj(metadata, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('body', {clean_html}),
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
'timestamp': ('publishedAt', {parse_iso8601}),
'modified_timestamp': ('updatedAt', {parse_iso8601}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'id', {str}),
})
class AsobiChannelIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
'md5': '39df74e872afe032c4eb27b89144fc92',
'info_dict': {
'id': '1ypp48qd32p',
'ext': 'mp4',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
'timestamp': 1697098247,
'upload_date': '20231012',
'modified_timestamp': 1698381162,
'modified_date': '20231027',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}, {
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
'info_dict': {
'id': 'redigiwnjzqj',
'ext': 'mp4',
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
'modified_timestamp': 1697797125,
'modified_date': '20231020',
'timestamp': 1697261769,
'upload_date': '20231014',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}]
_survapi_header = None
def _real_initialize(self):
token = self._download_json(
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
note='Retrieving API token')
self._survapi_header = {'Authorization': f'Bearer {token}'}
def _process_vod(self, video_id, metadata):
content_id = metadata['contents']['video_id']
vod_data = self._download_json(
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
headers=self._survapi_header, note='Downloading vod data')
return {
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
}
def _process_live(self, video_id, metadata):
content_id = metadata['contents']['video_id']
event_data = self._download_json(
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
headers=self._survapi_header, note='Downloading event data')
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
if player_type == 'poster':
self.raise_no_formats('Live event has not yet started', expected=True)
live_status = 'is_upcoming'
formats = []
elif player_type == 'player':
live_status = 'is_live'
formats = self._extract_m3u8_formats(
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
else:
raise ExtractorError('Unsupported player type {player_type!r}')
return {
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
'live_status': live_status,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
headers=self._MICROCMS_HEADER)
info = self._extract_info(metadata)
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
if video_type == 'VOD':
return merge_dicts(info, self._process_vod(video_id, metadata))
if video_type == 'LIVE':
return merge_dicts(info, self._process_live(video_id, metadata))
raise ExtractorError(f'Unexpected video type {video_type!r}')
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel:tag'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
'info_dict': {
'id': 'bjhh-nbcja',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
},
'playlist_mincount': 16,
}, {
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
'info_dict': {
'id': 'hvm5qw3c6od',
'title': 'アイマスMOIW2023ラジオ',
},
'playlist_mincount': 13,
}]
def _real_extract(self, url):
tag_id = self._match_id(url)
webpage = self._download_webpage(url, tag_id)
title = traverse_obj(self._search_nextjs_data(
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
media = self._download_json(
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
tag_id, headers=self._MICROCMS_HEADER)
def entries():
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
yield {
'_type': 'url',
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
'ie_key': AsobiChannelIE.ie_key(),
**self._extract_info(metadata),
}
return self.playlist_result(entries(), tag_id, title)

View File

@ -0,0 +1,155 @@
import functools
from .common import InfoExtractor
from ..utils import str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class AsobiStageIE(InfoExtractor):
IE_DESC = 'ASOBISTAGE (アソビステージ)'
_VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)'
_TESTS = [{
'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame',
'info_dict': {
'id': '315passionhour_2022summer/archive/frame',
'title': '315プロダクションプレゼンツ 315パッションアワー!!!',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': 'edff52f2',
'ext': 'mp4',
'title': '315passion_FRAME_only',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}],
}, {
'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live',
'info_dict': {
'id': 'idolmaster_idolworld2023_goods/archive/live',
'title': 'md5:378510b6e830129d505885908bd6c576',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '3aef7110',
'ext': 'mp4',
'title': 'asobistore_station_1020_serverREC',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}],
}, {
'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc',
'playlist_count': 4,
'info_dict': {
'id': 'sidem_fclive_bpct/archive/premium_hc',
'title': '315 Production presents FNTASTIC COMBINATION LIVE BRAINPOWER!!/CONNECTIME!!!!',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}, {
'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1',
'only_matching': True,
}]
_API_HOST = 'https://asobistage-api.asobistore.jp'
_HEADERS = {}
_is_logged_in = False
@functools.cached_property
def _owned_tickets(self):
owned_tickets = set()
if not self._is_logged_in:
return owned_tickets
for path, name in [
('api/v1/purchase_history/list', 'ticket purchase history'),
('api/v1/serialcode/list', 'redemption history'),
]:
response = self._download_json(
f'{self._API_HOST}/{path}', None, f'Downloading {name}',
f'Unable to download {name}', expected_status=400)
if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin':
self._is_logged_in = False
break
owned_tickets.update(
traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none})))
return owned_tickets
def _get_available_channel_id(self, channel):
channel_id = traverse_obj(channel, ('chennel_vspf_id', {str}))
if not channel_id:
return None
# if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)'
if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)):
return channel_id
available_tickets = traverse_obj(channel, (
'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none}))
if not self._owned_tickets.intersection(available_tickets):
self.report_warning(
f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"')
return None
return channel_id
def _real_initialize(self):
if self._get_cookies(self._API_HOST):
self._is_logged_in = True
token = self._download_json(
f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token')
self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url):
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', {
'title': ('event_name', {str}),
'thumbnail': ('event_thumbnail_image', {url_or_none}),
}))
available_channels = traverse_obj(self._download_json(
f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json',
video_id, 'Getting channel list', 'Unable to get channel list'), (
video_type, lambda _, v: v['broadcast_slug'] == slug,
'channels', lambda _, v: v['chennel_vspf_id'] != '00000'))
entries = []
for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})):
if video_type == 'archives':
channel_json = self._download_json(
f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id,
'Getting archive channel info', 'Unable to get archive channel info', fatal=False,
headers=self._HEADERS)
channel_data = traverse_obj(channel_json, ('ex_content', {
'm3u8_url': 'streaming_url',
'title': 'title',
'thumbnail': ('thumbnail', 'url'),
}))
else: # video_type == 'broadcasts'
channel_json = self._download_json(
f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id,
'Getting live channel info', 'Unable to get live channel info', fatal=False,
headers=self._HEADERS, query={'embed': 'channel'})
channel_data = traverse_obj(channel_json, ('data', {
'm3u8_url': ('Channel', 'Custom_live_url'),
'title': 'Name',
'thumbnail': 'Poster_url',
}))
entries.append({
'id': channel_id,
'title': channel_data.get('title'),
'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False),
'is_live': video_type == 'broadcasts',
'thumbnail': url_or_none(channel_data.get('thumbnail')),
})
if not self._is_logged_in and not entries:
self.raise_login_required()
return self.playlist_result(entries, video_id, **event_data)

View File

@ -1,104 +1,152 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class AtresPlayerIE(InfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer' _NETRC_MACHINE = 'atresplayer'
_TESTS = [ _TESTS = [{
{ 'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', 'info_dict': {
'info_dict': { 'ext': 'mp4',
'id': '5d4aa2c57ed1a88fc715a615', 'id': '67f2dfb2fb6ab0e4c7203849',
'ext': 'mp4', 'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
'title': 'Capítulo 7: Asuntos pendientes', 'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'channel': 'laSexta',
'duration': 3413, 'duration': 31,
}, 'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
'skip': 'This video is only available for registered users' 'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
'series': 'El Objetivo',
'season': 'Temporada 12',
'timestamp': 1743970079,
'upload_date': '20250406',
}, },
{ }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', 'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67f836baa4a5b0e4147ca59a',
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
'channel': 'Antena 3',
'duration': 2556,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
'series': 'El Hormiguero ',
'season': 'Temporada 14',
'timestamp': 1744320111,
'upload_date': '20250410',
}, },
{ }, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67a6038b64ceca00070f4f69',
'display_id': 'capitulo-3-supervivientes',
'title': 'Capítulo 3: Supervivientes',
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
'channel': 'Flooxer',
'duration': 1196,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
'series': 'BIARA: Proyecto Lázarus',
'season': 'Temporada 1',
'season_number': 1,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1743095191,
'upload_date': '20250327',
}, },
] }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
}]
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code):
if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
def _perform_login(self, username, password): def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
try: try:
target_url = self._download_json( self._download_webpage(
'https://account.atresmedia.com/api/login', None, 'https://account.atresplayer.com/auth/v1/login', None,
'Logging in', headers={ 'Logging in', 'Failed to log in', data=urlencode_postdata({
'Content-Type': 'application/x-www-form-urlencoded'
}, data=urlencode_postdata({
'username': username, 'username': username,
'password': password, 'password': password,
}))['targetUrl'] }))
except ExtractorError as e: except ExtractorError as e:
self._handle_error(e, 400) if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True)
self._request_webpage(target_url, None, 'Following Target URL') raise
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups() display_id, video_id = self._match_valid_url(url).groups()
try: metadata_url = self._download_json(
episode = self._download_json( self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) query={'href': urllib.parse.urlparse(url).path})['href']
except ExtractorError as e: metadata = self._download_json(metadata_url, video_id)
self._handle_error(e, 403)
title = episode['titulo'] try:
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
formats = [] formats = []
subtitles = {} subtitles = {}
for source in episode.get('sources', []): for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
src = source.get('src') src_url = source['src']
if not src:
continue
src_type = source.get('type') src_type = source.get('type')
if src_type == 'application/vnd.apple.mpegurl': if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
formats, subtitles = self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
src, video_id, 'mp4', 'm3u8_native', src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
m3u8_id='hls', fatal=False) elif src_type in ('application/dash+xml', 'application/dash+hevc'):
elif src_type == 'application/dash+xml': fmts, subs = self._extract_mpd_formats_and_subtitles(
formats, subtitles = self._extract_mpd_formats( src_url, video_id, mpd_id='dash', fatal=False)
src, video_id, mpd_id='dash', fatal=False) else:
continue
heartbeat = episode.get('heartbeat') or {} formats.extend(fmts)
omniture = episode.get('omniture') or {} self._merge_subtitles(subs, target=subtitles)
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
return { return {
'display_id': display_id, 'display_id': display_id,
'id': video_id, 'id': video_id,
'title': title,
'description': episode.get('descripcion'),
'thumbnail': episode.get('imgPoster'),
'duration': int_or_none(episode.get('duration')),
'formats': formats, 'formats': formats,
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(video_data, {
'title': ('titulo', {str}),
'description': ('descripcion', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
'age_limit': ('ageRating', {parse_age_limit}),
}),
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'age_limit': ('ageRating', {parse_age_limit}),
'series': ('format', 'title', {str}),
'season': ('currentSeason', 'title', {str}),
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
'episode_number': ('numberOfEpisode', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
'channel': ('channel', 'title', {str}),
}),
} }

View File

@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'data-scale-spring-2022', 'id': 'data-scale-spring-2022',
'title': 'Data @Scale Spring 2022', 'title': 'Data @Scale Spring 2022',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
}, },
}, { }, {
'url': 'https://atscaleconference.com/events/video-scale-2021/', 'url': 'https://atscaleconference.com/events/video-scale-2021/',
@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'video-scale-2021', 'id': 'video-scale-2021',
'title': 'Video @Scale 2021', 'title': 'Video @Scale 2021',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, playlist_id)
return self.playlist_from_matches( return self.playlist_from_matches(
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
ie='Generic', playlist_id=id, ie='Generic', playlist_id=playlist_id,
title=self._og_search_title(webpage), description=self._og_search_description(webpage)) title=self._og_search_title(webpage), description=self._og_search_description(webpage))

View File

@ -1,53 +0,0 @@
from .common import InfoExtractor
from ..utils import unified_strdate
class ATTTechChannelIE(InfoExtractor):
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
_TEST = {
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
'info_dict': {
'id': '11316',
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
'ext': 'flv',
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140127',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r"url\s*:\s*'(rtmp://[^']+)'",
webpage, 'video URL')
video_id = self._search_regex(
r'mediaid\s*=\s*(\d+)',
webpage, 'video id', fatal=False)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
webpage, 'upload date', fatal=False), False)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'ext': 'flv',
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View File

@ -1,11 +1,11 @@
import datetime import datetime as dt
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
float_or_none, float_or_none,
jwt_encode_hs256, jwt_encode_hs256,
try_get, try_get,
ExtractorError,
) )
@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
'id': 'v-ce9cgn1e70n5-1', 'id': 'v-ce9cgn1e70n5-1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
} },
}, { }, {
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
'only_matching': True, 'only_matching': True,
@ -66,14 +66,14 @@ class ATVAtIE(InfoExtractor):
video_id=video_id) video_id=video_id)
video_title = json_data['views']['default']['page']['title'] video_title = json_data['views']['default']['page']['title']
contentResource = json_data['views']['default']['page']['contentResource'] content_resource = json_data['views']['default']['page']['contentResource']
content_id = contentResource[0]['id'] content_id = content_resource[0]['id']
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id, content in enumerate(contentResource)] for id_, content in enumerate(content_resource)]
time_of_request = datetime.datetime.now() time_of_request = dt.datetime.now()
not_before = time_of_request - datetime.timedelta(minutes=5) not_before = time_of_request - dt.timedelta(minutes=5)
expire = time_of_request + datetime.timedelta(minutes=5) expire = time_of_request + dt.timedelta(minutes=5)
payload = { payload = {
'content_ids': { 'content_ids': {
content_id: content_ids, content_id: content_ids,
@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
videos = self._download_json( videos = self._download_json(
'https://vas-v4.p7s1video.net/4.0/getsources', 'https://vas-v4.p7s1video.net/4.0/getsources',
content_id, 'Downloading videos JSON', query={ content_id, 'Downloading videos JSON', query={
'token': jwt_token.decode('utf-8') 'token': jwt_token.decode('utf-8'),
}) })
video_id, videos_data = list(videos['data'].items())[0] video_id, videos_data = next(iter(videos['data'].items()))
error_msg = try_get(videos_data, lambda x: x['error']['title']) error_msg = try_get(videos_data, lambda x: x['error']['title'])
if error_msg == 'Geo check failed': if error_msg == 'Geo check failed':
self.raise_geo_restricted(error_msg) self.raise_geo_restricted(error_msg)
elif error_msg: elif error_msg:
raise ExtractorError(error_msg) raise ExtractorError(error_msg)
entries = [ entries = [
self._extract_video_info(url, contentResource[video['id']], video) self._extract_video_info(url, content_resource[video['id']], video)
for video in videos_data] for video in videos_data]
return { return {

View File

@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
'timestamp': 1448354940, 'timestamp': 1448354940,
'duration': 74022, 'duration': 74022,
'view_count': int, 'view_count': int,
} },
}, { }, {
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
'only_matching': True, 'only_matching': True,
@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
if bitrate: if bitrate:
f.update({ f.update({
'format_id': 'http-%s' % bitrate, 'format_id': f'http-{bitrate}',
}) })
formats.append(f) formats.append(f)

View File

@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 4000.99, 'duration': 4000.99,
'uploader': 'Sue Perkins: An hour or so with...', 'uploader': 'Sue Perkins: An hour or so with...',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
} },
}, { # Direct mp3-file link }, { # Direct mp3-file link
'url': 'https://audioboom.com/posts/8128496.mp3', 'url': 'https://audioboom.com/posts/8128496.mp3',
'md5': 'e329edf304d450def95c7f86a9165ee1', 'md5': 'e329edf304d450def95c7f86a9165ee1',
@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 1689.7, 'duration': 1689.7,
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
} },
}, { }, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True, 'only_matching': True,

View File

@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
headers={ headers={
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}, data=f'id={player_entry_id}'.encode('utf-8')) }, data=f'id={player_entry_id}'.encode())
return { return {
'id': str(data_json['entry_id']), 'id': str(data_json['entry_id']),
@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, video_id)
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') player_entry_id = self._search_regex(
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
return self._audiodraft_extract_from_id(player_entry_id) return self._audiodraft_extract_from_id(player_entry_id)
@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
return self._audiodraft_extract_from_id(f'player_entry_{id}') return self._audiodraft_extract_from_id(f'player_entry_{video_id}')

View File

@ -3,7 +3,6 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from .soundcloud import SoundcloudIE from .soundcloud import SoundcloudIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
url_basename, url_basename,
@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
'id': '310086', 'id': '310086',
'ext': 'mp3', 'ext': 'mp3',
'uploader': 'Roosh Williams', 'uploader': 'Roosh Williams',
'title': 'Extraordinary' 'title': 'Extraordinary',
} },
}, },
# audiomack wrapper around soundcloud song # audiomack wrapper around soundcloud song
# Needs new test URL. # Needs new test URL.
@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors # API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response: if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s' % url) raise ExtractorError(f'Invalid url {url}')
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor # if so, pass the work off to the soundcloud extractor
@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return self.url_result(api_response['url'], SoundcloudIE.ie_key())
return { return {
'id': compat_str(api_response.get('id', album_url_tag)), 'id': str(api_response.get('id', album_url_tag)),
'uploader': api_response.get('artist'), 'uploader': api_response.get('artist'),
'title': api_response.get('title'), 'title': api_response.get('title'),
'url': api_response['url'], 'url': api_response['url'],
@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
'info_dict': 'info_dict':
{ {
'id': '812251', 'id': '812251',
'title': 'Tha Tour: Part 2 (Official Mixtape)' 'title': 'Tha Tour: Part 2 (Official Mixtape)',
} },
}, },
# Album playlist ripped from fakeshoredrive with no metadata # Album playlist ripped from fakeshoredrive with no metadata
{ {
@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
'id': '837576', 'id': '837576',
'ext': 'mp3', 'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo', 'uploader': 'Lil Herb a.k.a. G Herbo',
} },
}, { }, {
'info_dict': { 'info_dict': {
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
'id': '837580', 'id': '837580',
'ext': 'mp3', 'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo', 'uploader': 'Lil Herb a.k.a. G Herbo',
} },
}], }],
} },
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
api_response = self._download_json( api_response = self._download_json(
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
% (album_url_tag, track_no, time.time()), album_url_tag, % (album_url_tag, track_no, time.time()), album_url_tag,
note='Querying song information (%d)' % (track_no + 1)) note=f'Querying song information ({track_no + 1})')
# Total failure, only occurs when url is totally wrong # Total failure, only occurs when url is totally wrong
# Won't happen in middle of valid playlist (next case) # Won't happen in middle of valid playlist (next case)
if 'url' not in api_response or 'error' in api_response: if 'url' not in api_response or 'error' in api_response:
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
# URL is good but song id doesn't exist - usually means end of playlist # URL is good but song id doesn't exist - usually means end of playlist
elif not api_response['url']: elif not api_response['url']:
break break
@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
# Pull out the album metadata and add to result (if it exists) # Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
if apikey in api_response and resultkey not in result: if apikey in api_response and resultkey not in result:
result[resultkey] = compat_str(api_response[apikey]) result[resultkey] = str(api_response[apikey])
song_id = url_basename(api_response['url']).rpartition('.')[0] song_id = url_basename(api_response['url']).rpartition('.')[0]
result['entries'].append({ result['entries'].append({
'id': compat_str(api_response.get('id', song_id)), 'id': str(api_response.get('id', song_id)),
'uploader': api_response.get('artist'), 'uploader': api_response.get('artist'),
'title': api_response.get('title', song_id), 'title': api_response.get('title', song_id),
'url': api_response['url'], 'url': api_response['url'],

View File

@ -1,7 +1,7 @@
import random import random
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import ExtractorError, str_or_none, try_get from ..utils import ExtractorError, str_or_none, try_get
@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
if response_data is not None: if response_data is not None:
return response_data return response_data
if len(response) == 1 and 'message' in response: if len(response) == 1 and 'message' in response:
raise ExtractorError('API error: %s' % response['message'], raise ExtractorError('API error: {}'.format(response['message']),
expected=True) expected=True)
raise ExtractorError('Unexpected API response') raise ExtractorError('Unexpected API response')
def _select_api_base(self): def _select_api_base(self):
"""Selecting one of the currently available API hosts""" """Selecting one of the currently available API hosts"""
response = super(AudiusBaseIE, self)._download_json( response = super()._download_json(
'https://api.audius.co/', None, 'https://api.audius.co/', None,
note='Requesting available API hosts', note='Requesting available API hosts',
errnote='Unable to request available API hosts') errnote='Unable to request available API hosts')
@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
anything from this link, since the Audius API won't be able to resolve anything from this link, since the Audius API won't be able to resolve
this url this url
""" """
url = compat_urllib_parse_unquote(url) url = urllib.parse.unquote(url)
title = compat_urllib_parse_unquote(title) title = urllib.parse.unquote(title)
if '/' in title or '%2F' in title: if '/' in title or '%2F' in title:
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
return url.replace(title, fixed_title) return url.replace(title, fixed_title)
@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
if self._API_BASE is None: if self._API_BASE is None:
self._select_api_base() self._select_api_base()
try: try:
response = super(AudiusBaseIE, self)._download_json( response = super()._download_json(
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
errnote=errnote, expected_status=expected_status) errnote=errnote, expected_status=expected_status)
except ExtractorError as exc: except ExtractorError as exc:
# some of Audius API hosts may not work as expected and return HTML # some of Audius API hosts may not work as expected and return HTML
if 'Failed to parse JSON' in compat_str(exc): if 'Failed to parse JSON' in str(exc):
raise ExtractorError('An error occurred while receiving data. Try again', raise ExtractorError('An error occurred while receiving data. Try again',
expected=True) expected=True)
raise exc raise exc
return self._get_response_data(response) return self._get_response_data(response)
def _resolve_url(self, url, item_id): def _resolve_url(self, url, item_id):
return self._api_request('/resolve?url=%s' % url, item_id, return self._api_request(f'/resolve?url={url}', item_id,
expected_status=404) expected_status=404)
@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
} },
}, },
{ {
# Regular track # Regular track
@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
} },
}, },
] ]
_ARTWORK_MAP = { _ARTWORK_MAP = {
"150x150": 150, '150x150': 150,
"480x480": 480, '480x480': 480,
"1000x1000": 1000 '1000x1000': 1000,
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
else: # API link else: # API link
title = None title = None
# uploader = None # uploader = None
track_data = self._api_request('/tracks/%s' % track_id, track_id) track_data = self._api_request(f'/tracks/{track_id}', track_id)
if not isinstance(track_data, dict): if not isinstance(track_data, dict):
raise ExtractorError('Unexpected API response') raise ExtractorError('Unexpected API response')
@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
if isinstance(artworks_data, dict): if isinstance(artworks_data, dict):
for quality_key, thumbnail_url in artworks_data.items(): for quality_key, thumbnail_url in artworks_data.items():
thumbnail = { thumbnail = {
"url": thumbnail_url 'url': thumbnail_url,
} }
quality_code = self._ARTWORK_MAP.get(quality_key) quality_code = self._ARTWORK_MAP.get(quality_key)
if quality_code is not None: if quality_code is not None:
@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
return { return {
'id': track_id, 'id': track_id,
'title': track_data.get('title', title), 'title': track_data.get('title', title),
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
'ext': 'mp3', 'ext': 'mp3',
'description': track_data.get('description'), 'description': track_data.get('description'),
'duration': track_data.get('duration'), 'duration': track_data.get('duration'),
'track': track_data.get('title'), 'track': track_data.get('title'),
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), 'artist': try_get(track_data, lambda x: x['user']['name'], str),
'genre': track_data.get('genre'), 'genre': track_data.get('genre'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'view_count': track_data.get('play_count'), 'view_count': track_data.get('play_count'),
@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
_TESTS = [ _TESTS = [
{ {
'url': 'audius:9RWlo', 'url': 'audius:9RWlo',
'only_matching': True 'only_matching': True,
}, },
{ {
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
'only_matching': True 'only_matching': True,
}, },
] ]
@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
if not track_id: if not track_id:
raise ExtractorError('Unable to get track ID from playlist') raise ExtractorError('Unable to get track ID from playlist')
entries.append(self.url_result( entries.append(self.url_result(
'audius:%s' % track_id, f'audius:{track_id}',
ie=AudiusTrackIE.ie_key(), video_id=track_id)) ie=AudiusTrackIE.ie_key(), video_id=track_id))
return entries return entries
@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
raise ExtractorError('Unable to get playlist ID') raise ExtractorError('Unable to get playlist ID')
playlist_tracks = self._api_request( playlist_tracks = self._api_request(
'/playlists/%s/tracks' % playlist_id, f'/playlists/{playlist_id}/tracks',
title, note='Downloading playlist tracks metadata', title, note='Downloading playlist tracks metadata',
errnote='Unable to download playlist tracks metadata') errnote='Unable to download playlist tracks metadata')
if not isinstance(playlist_tracks, list): if not isinstance(playlist_tracks, list):
@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
profile_audius_id = _profile_data[0]['id'] profile_audius_id = _profile_data[0]['id']
profile_bio = _profile_data[0].get('bio') profile_bio = _profile_data[0].get('bio')
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)

View File

@ -1,10 +1,7 @@
import base64 import base64
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
)
from ..utils import ( from ..utils import (
format_field, format_field,
int_or_none, int_or_none,
@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
show_id, video_id, season_id = self._match_valid_url(url).groups() show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0: if video_id and int(video_id) > 0:
return self.url_result( return self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
elif season_id and int(season_id) > 0: elif season_id and int(season_id) > 0:
return self.url_result(smuggle_url( return self.url_result(smuggle_url(
'http://awaan.ae/program/season/%s' % season_id, f'http://awaan.ae/program/season/{season_id}',
{'show_id': show_id}), 'AWAANSeason') {'show_id': show_id}), 'AWAANSeason')
else: else:
return self.url_result( return self.url_result(
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
class AWAANBaseIE(InfoExtractor): class AWAANBaseIE(InfoExtractor):
@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
video_id, headers={'Origin': 'http://awaan.ae'}) video_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(video_data, video_id, False) info = self._parse_video_data(video_data, video_id, False)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
'id': video_data['id'], 'id': video_data['id'],
'user_id': video_data['user_id'], 'user_id': video_data['user_id'],
'signature': video_data['signature'], 'signature': video_data['signature'],
@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
channel_id = self._match_id(url) channel_id = self._match_id(url)
channel_data = self._download_json( channel_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
channel_id, headers={'Origin': 'http://awaan.ae'}) channel_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(channel_data, channel_id, True) info = self._parse_video_data(channel_data, channel_id, True)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'], 'signature': channel_data['signature'],
@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
show_id = smuggled_data.get('show_id') show_id = smuggled_data.get('show_id')
if show_id is None: if show_id is None:
season = self._download_json( season = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
season_id, headers={'Origin': 'http://awaan.ae'}) season_id, headers={'Origin': 'http://awaan.ae'})
show_id = season['id'] show_id = season['id']
data['show_id'] = show_id data['show_id'] = show_id
@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
'http://admin.mangomolo.com/analytics/index.php/plus/show', 'http://admin.mangomolo.com/analytics/index.php/plus/show',
show_id, data=urlencode_postdata(data), headers={ show_id, data=urlencode_postdata(data), headers={
'Origin': 'http://awaan.ae', 'Origin': 'http://awaan.ae',
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded',
}) })
if not season_id: if not season_id:
season_id = show['default_season'] season_id = show['default_season']
@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
entries = [] entries = []
for video in show['videos']: for video in show['videos']:
video_id = compat_str(video['id']) video_id = str(video['id'])
entries.append(self.url_result( entries.append(self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
return self.playlist_result(entries, season_id, title) return self.playlist_result(entries, season_id, title)

View File

@ -1,9 +1,9 @@
import datetime import datetime as dt
import hashlib import hashlib
import hmac import hmac
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
@ -12,26 +12,26 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _aws_execute_api(self, aws_dict, video_id, query=None): def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {} query = query or {}
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8] date = amz_date[:8]
headers = { headers = {
'Accept': 'application/json', 'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST, 'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date, 'X-Amz-Date': amz_date,
'X-Api-Key': self._AWS_API_KEY 'X-Api-Key': self._AWS_API_KEY,
} }
session_token = aws_dict.get('session_token') session_token = aws_dict.get('session_token')
if session_token: if session_token:
headers['X-Amz-Security-Token'] = session_token headers['X-Amz-Security-Token'] = session_token
def aws_hash(s): def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest() return hashlib.sha256(s.encode()).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query) canonical_querystring = urllib.parse.urlencode(query)
canonical_headers = '' canonical_headers = ''
for header_name, header_value in sorted(headers.items()): for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) canonical_headers += f'{header_name.lower()}:{header_value}\n'
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([ canonical_request = '\n'.join([
'GET', 'GET',
@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
canonical_querystring, canonical_querystring,
canonical_headers, canonical_headers,
signed_headers, signed_headers,
aws_hash('') aws_hash(''),
]) ])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg): def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) return hmac.new(key, msg.encode(), hashlib.sha256)
def aws_hmac_digest(key, msg): def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest() return aws_hmac(key, msg).digest()
@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def aws_hmac_hexdigest(key, msg): def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest() return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') k_signing = ('AWS4' + aws_dict['secret_key']).encode()
for value in credential_scope_list: for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value) k_signing = aws_hmac_digest(k_signing, value)
@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([ headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers, f'SignedHeaders={signed_headers}',
'Signature=%s' % signature, f'Signature={signature}',
]) ])
return self._download_json( return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers) video_id, headers=headers)

Some files were not shown because too many files have changed in this diff Show More