Upgrade yt_dlp and download script
This commit is contained in:
parent
3a2e8eeb08
commit
d68d9ce4f9
@ -8,12 +8,29 @@
|
||||
|
||||
|
||||
function main() {
|
||||
cd "$(dirname "")"
|
||||
echo "Working Dir: " $(pwd)
|
||||
_STARGET="${1}"
|
||||
_SPATH="${HOME}/.config/solarfm/plugins/youtube_download"
|
||||
LINK=`xclip -selection clipboard -o`
|
||||
|
||||
python "${HOME}/.config/solarfm/plugins/youtube_download/yt_dlp/__main__.py" \
|
||||
--write-sub --embed-sub --sub-langs en \
|
||||
-o "${1}/%(title)s.%(ext)s" "${LINK}"
|
||||
cd "${_SPATH}"
|
||||
echo "Working Dir: " $(pwd)
|
||||
|
||||
rm "${_SPATH}/../../cookies.txt"
|
||||
|
||||
# Note: Export cookies to file
|
||||
python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
--cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt"
|
||||
|
||||
# Note: Use cookies from browser directly
|
||||
# python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
# --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \
|
||||
# -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
|
||||
|
||||
# Note: Download video
|
||||
python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
-f "bestvideo[height<=1080][ext=mp4][vcodec^=avc]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
|
||||
--cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \
|
||||
-o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
|
||||
|
||||
}
|
||||
main "$@";
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
try:
|
||||
import contextvars # noqa: F401
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
|
||||
import sys
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
if sys.version_info < (3, 9):
|
||||
raise ImportError(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541
|
||||
|
||||
__license__ = 'The Unlicense'
|
||||
|
||||
import collections
|
||||
import getpass
|
||||
@ -12,15 +12,16 @@ import itertools
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from .compat import compat_shlex_quote
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .networking.impersonate import ImpersonateTarget
|
||||
from .globals import IN_CLI, plugin_dirs
|
||||
from .options import parseOpts
|
||||
from .plugins import load_all_plugins as _load_all_plugins
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegMergerPP,
|
||||
@ -43,12 +44,12 @@ from .utils import (
|
||||
GeoUtils,
|
||||
PlaylistEntries,
|
||||
SameFileError,
|
||||
decodeOption,
|
||||
download_range_func,
|
||||
expand_path,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
match_filter_func,
|
||||
parse_bytes,
|
||||
parse_duration,
|
||||
@ -57,15 +58,15 @@ from .utils import (
|
||||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
shell_quote,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .utils._utils import _UnsafeExtensionError
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
||||
|
||||
def _exit(status=0, *args):
|
||||
for msg in args:
|
||||
@ -74,14 +75,16 @@ def _exit(status=0, *args):
|
||||
|
||||
|
||||
def get_urls(urls, batchfile, verbose):
|
||||
# Batch file verification
|
||||
"""
|
||||
@param verbose -1: quiet, 0: normal, 1: verbose
|
||||
"""
|
||||
batch_urls = []
|
||||
if batchfile is not None:
|
||||
try:
|
||||
batch_urls = read_batch_urls(
|
||||
read_stdin('URLs') if batchfile == '-'
|
||||
read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
|
||||
else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
|
||||
if verbose:
|
||||
if verbose == 1:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except OSError:
|
||||
_exit(f'ERROR: batch file {batchfile} could not be read')
|
||||
@ -112,9 +115,9 @@ def print_extractor_information(opts, urls):
|
||||
ie.description(markdown=False, search_examples=_SEARCHES)
|
||||
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
|
||||
elif opts.ap_list_mso:
|
||||
out = 'Supported TV Providers:\n%s\n' % render_table(
|
||||
out = 'Supported TV Providers:\n{}\n'.format(render_table(
|
||||
['mso', 'mso name'],
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
|
||||
else:
|
||||
return False
|
||||
write_string(out, out=sys.stdout)
|
||||
@ -126,7 +129,7 @@ def set_compat_opts(opts):
|
||||
if name not in opts.compat_opts:
|
||||
return False
|
||||
opts.compat_opts.discard(name)
|
||||
opts.compat_opts.update(['*%s' % name])
|
||||
opts.compat_opts.update([f'*{name}'])
|
||||
return True
|
||||
|
||||
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
@ -153,6 +156,9 @@ def set_compat_opts(opts):
|
||||
opts.embed_infojson = False
|
||||
if 'format-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter.ytdl_default)
|
||||
elif 'prefer-vp9-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
|
||||
|
||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
@ -219,7 +225,7 @@ def validate_options(opts):
|
||||
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
|
||||
|
||||
if opts.wait_for_video is not None:
|
||||
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
|
||||
min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
|
||||
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
|
||||
'time range to wait for video', opts.wait_for_video)
|
||||
validate_minmax(min_wait, max_wait, 'time range to wait for video')
|
||||
@ -230,6 +236,11 @@ def validate_options(opts):
|
||||
validate_regex('format sorting', f, FormatSorter.regex)
|
||||
|
||||
# Postprocessor formats
|
||||
if opts.convertsubtitles == 'none':
|
||||
opts.convertsubtitles = None
|
||||
if opts.convertthumbnails == 'none':
|
||||
opts.convertthumbnails = None
|
||||
|
||||
validate_regex('merge output format', opts.merge_output_format,
|
||||
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
|
||||
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
||||
@ -249,9 +260,11 @@ def validate_options(opts):
|
||||
elif value in ('inf', 'infinite'):
|
||||
return float('inf')
|
||||
try:
|
||||
return int(value)
|
||||
int_value = int(value)
|
||||
except (TypeError, ValueError):
|
||||
validate(False, f'{name} retry count', value)
|
||||
validate_positive(f'{name} retry count', int_value)
|
||||
return int_value
|
||||
|
||||
opts.retries = parse_retries('download', opts.retries)
|
||||
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
|
||||
@ -261,9 +274,9 @@ def validate_options(opts):
|
||||
# Retry sleep function
|
||||
def parse_sleep_func(expr):
|
||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||
op, start, limit, step, *_ = tuple(re.fullmatch(
|
||||
op, start, limit, step, *_ = (*tuple(re.fullmatch(
|
||||
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
|
||||
expr.strip()).groups()) + (None, None)
|
||||
expr.strip()).groups()), None, None)
|
||||
|
||||
if op == 'exp':
|
||||
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
|
||||
@ -281,18 +294,20 @@ def validate_options(opts):
|
||||
raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
|
||||
|
||||
# Bytes
|
||||
def validate_bytes(name, value):
|
||||
def validate_bytes(name, value, strict_positive=False):
|
||||
if value is None:
|
||||
return None
|
||||
numeric_limit = parse_bytes(value)
|
||||
validate(numeric_limit is not None, 'rate limit', value)
|
||||
validate(numeric_limit is not None, name, value)
|
||||
if strict_positive:
|
||||
validate_positive(name, numeric_limit, True)
|
||||
return numeric_limit
|
||||
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit)
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True)
|
||||
opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit)
|
||||
opts.min_filesize = validate_bytes('min filesize', opts.min_filesize)
|
||||
opts.max_filesize = validate_bytes('max filesize', opts.max_filesize)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize, True)
|
||||
opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size)
|
||||
|
||||
# Output templates
|
||||
@ -387,16 +402,19 @@ def validate_options(opts):
|
||||
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
|
||||
|
||||
if opts.impersonate is not None:
|
||||
opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower())
|
||||
|
||||
# MetadataParser
|
||||
def metadataparser_actions(f):
|
||||
if isinstance(f, str):
|
||||
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
|
||||
cmd = f'--parse-metadata {shell_quote(f)}'
|
||||
try:
|
||||
actions = [MetadataFromFieldPP.to_action(f)]
|
||||
except Exception as err:
|
||||
raise ValueError(f'{cmd} is invalid; {err}')
|
||||
else:
|
||||
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
|
||||
cmd = f'--replace-in-metadata {shell_quote(f)}'
|
||||
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
|
||||
|
||||
for action in actions:
|
||||
@ -407,13 +425,17 @@ def validate_options(opts):
|
||||
yield action
|
||||
|
||||
if opts.metafromtitle is not None:
|
||||
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
|
||||
opts.parse_metadata = {
|
||||
k: list(itertools.chain(*map(metadataparser_actions, v)))
|
||||
for k, v in opts.parse_metadata.items()
|
||||
}
|
||||
|
||||
# Other options
|
||||
opts.plugin_dirs = opts.plugin_dirs
|
||||
if opts.plugin_dirs is None:
|
||||
opts.plugin_dirs = ['default']
|
||||
|
||||
if opts.playlist_items is not None:
|
||||
try:
|
||||
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
|
||||
@ -460,7 +482,7 @@ def validate_options(opts):
|
||||
default_downloader = ed.get_basename()
|
||||
|
||||
for policy in opts.color.values():
|
||||
if policy not in ('always', 'auto', 'no_color', 'never'):
|
||||
if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'):
|
||||
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||
|
||||
warnings, deprecation_warnings = [], []
|
||||
@ -586,6 +608,13 @@ def validate_options(opts):
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
|
||||
|
||||
# compat option changes global state destructively; only allow from cli
|
||||
if 'allow-unsafe-ext' in opts.compat_opts:
|
||||
warnings.append(
|
||||
'Using allow-unsafe-ext opens you up to potential attacks. '
|
||||
'Use with great care!')
|
||||
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
|
||||
|
||||
return warnings, deprecation_warnings
|
||||
|
||||
|
||||
@ -596,7 +625,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'MetadataParser',
|
||||
'actions': actions,
|
||||
'when': when
|
||||
'when': when,
|
||||
}
|
||||
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
|
||||
if sponsorblock_query:
|
||||
@ -604,19 +633,19 @@ def get_postprocessors(opts):
|
||||
'key': 'SponsorBlock',
|
||||
'categories': sponsorblock_query,
|
||||
'api': opts.sponsorblock_api,
|
||||
'when': 'after_filter'
|
||||
'when': 'after_filter',
|
||||
}
|
||||
if opts.convertsubtitles:
|
||||
yield {
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
'when': 'before_dl'
|
||||
'when': 'before_dl',
|
||||
}
|
||||
if opts.convertthumbnails:
|
||||
yield {
|
||||
'key': 'FFmpegThumbnailsConvertor',
|
||||
'format': opts.convertthumbnails,
|
||||
'when': 'before_dl'
|
||||
'when': 'before_dl',
|
||||
}
|
||||
if opts.extractaudio:
|
||||
yield {
|
||||
@ -641,7 +670,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs,
|
||||
}
|
||||
if not opts.writeautomaticsub and keep_subs:
|
||||
opts.writesubtitles = True
|
||||
@ -654,7 +683,7 @@ def get_postprocessors(opts):
|
||||
'remove_sponsor_segments': opts.sponsorblock_remove,
|
||||
'remove_ranges': opts.remove_ranges,
|
||||
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
|
||||
'force_keyframes': opts.force_keyframes_at_cuts
|
||||
'force_keyframes': opts.force_keyframes_at_cuts,
|
||||
}
|
||||
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
|
||||
# FFmpegExtractAudioPP as containers before conversion may not support
|
||||
@ -688,7 +717,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'EmbedThumbnail',
|
||||
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||
'already_have_thumbnail': opts.writethumbnail
|
||||
'already_have_thumbnail': opts.writethumbnail,
|
||||
}
|
||||
if not opts.writethumbnail:
|
||||
opts.writethumbnail = True
|
||||
@ -722,7 +751,7 @@ ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'u
|
||||
def parse_options(argv=None):
|
||||
"""@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
|
||||
parser, opts, urls = parseOpts(argv)
|
||||
urls = get_urls(urls, opts.batchfile, opts.verbose)
|
||||
urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
|
||||
|
||||
set_compat_opts(opts)
|
||||
try:
|
||||
@ -735,7 +764,7 @@ def parse_options(argv=None):
|
||||
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
|
||||
any_getting = any(getattr(opts, k) for k in (
|
||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
|
||||
))
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
@ -830,6 +859,7 @@ def parse_options(argv=None):
|
||||
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
'progress_template': opts.progress_template,
|
||||
'progress_delta': opts.progress_delta,
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
@ -858,8 +888,8 @@ def parse_options(argv=None):
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslangs': opts.subtitleslangs,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'matchtitle': opts.matchtitle,
|
||||
'rejecttitle': opts.rejecttitle,
|
||||
'max_downloads': opts.max_downloads,
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'trim_file_name': opts.trim_file_name,
|
||||
@ -910,6 +940,7 @@ def parse_options(argv=None):
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
'impersonate': opts.impersonate,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval_requests': opts.sleep_interval_requests,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
@ -959,6 +990,11 @@ def _real_main(argv=None):
|
||||
if opts.ffmpeg_location:
|
||||
FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
|
||||
|
||||
# load all plugins into the global lookup
|
||||
plugin_dirs.value = opts.plugin_dirs
|
||||
if plugin_dirs.value:
|
||||
_load_all_plugins()
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
pre_process = opts.update_self or opts.rm_cachedir
|
||||
actual_use = all_urls or opts.load_info_filename
|
||||
@ -979,11 +1015,68 @@ def _real_main(argv=None):
|
||||
traceback.print_exc()
|
||||
ydl._download_retcode = 100
|
||||
|
||||
if opts.list_impersonate_targets:
|
||||
|
||||
known_targets = [
|
||||
# List of simplified targets we know are supported,
|
||||
# to help users know what dependencies may be required.
|
||||
(ImpersonateTarget('chrome'), 'curl_cffi'),
|
||||
(ImpersonateTarget('safari'), 'curl_cffi'),
|
||||
(ImpersonateTarget('firefox'), 'curl_cffi>=0.10'),
|
||||
(ImpersonateTarget('edge'), 'curl_cffi'),
|
||||
]
|
||||
|
||||
available_targets = ydl._get_available_impersonate_targets()
|
||||
|
||||
def make_row(target, handler):
|
||||
return [
|
||||
join_nonempty(target.client.title(), target.version, delim='-') or '-',
|
||||
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
|
||||
handler,
|
||||
]
|
||||
|
||||
rows = [make_row(target, handler) for target, handler in available_targets]
|
||||
|
||||
for known_target, known_handler in known_targets:
|
||||
if not any(
|
||||
known_target in target and known_handler.startswith(handler)
|
||||
for target, handler in available_targets
|
||||
):
|
||||
rows.insert(0, [
|
||||
ydl._format_out(text, ydl.Styles.SUPPRESS)
|
||||
for text in make_row(known_target, f'{known_handler} (unavailable)')
|
||||
])
|
||||
|
||||
ydl.to_screen('[info] Available impersonate targets')
|
||||
ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-'))
|
||||
return
|
||||
|
||||
if not actual_use:
|
||||
if pre_process:
|
||||
return ydl._download_retcode
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
args = sys.argv[1:] if argv is None else argv
|
||||
ydl.warn_if_short_id(args)
|
||||
|
||||
# Show a useful error message and wait for keypress if not launched from shell on Windows
|
||||
if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
|
||||
import ctypes.wintypes
|
||||
import msvcrt
|
||||
|
||||
kernel32 = ctypes.WinDLL('Kernel32')
|
||||
|
||||
buffer = (1 * ctypes.wintypes.DWORD)()
|
||||
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
|
||||
# If we only have a single process attached, then the executable was double clicked
|
||||
# When using `pyinstaller` with `--onefile`, two processes get attached
|
||||
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
|
||||
if attached_processes == 1 or (is_onefile and attached_processes == 2):
|
||||
print(parser._generate_error_message(
|
||||
'Do not double-click the executable, instead call it from a command line.\n'
|
||||
'Please read the README for further information on how to use yt-dlp: '
|
||||
'https://github.com/yt-dlp/yt-dlp#readme'))
|
||||
msvcrt.getch()
|
||||
_exit(2)
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type yt-dlp --help to see a list of all options.')
|
||||
@ -1002,11 +1095,10 @@ def _real_main(argv=None):
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
global _IN_CLI
|
||||
_IN_CLI = True
|
||||
IN_CLI.value = True
|
||||
try:
|
||||
_exit(*variadic(_real_main(argv)))
|
||||
except DownloadError:
|
||||
except (CookieLoadError, DownloadError):
|
||||
_exit(1)
|
||||
except SameFileError as e:
|
||||
_exit(f'ERROR: {e}')
|
||||
@ -1024,9 +1116,9 @@ def main(argv=None):
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
|
||||
__all__ = [
|
||||
'main',
|
||||
'YoutubeDL',
|
||||
'parse_options',
|
||||
'gen_extractors',
|
||||
'list_extractors',
|
||||
'main',
|
||||
'parse_options',
|
||||
]
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Execute with
|
||||
# $ python -m yt_dlp
|
||||
# $ python3 -m yt_dlp
|
||||
|
||||
import sys
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import sys
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
from PyInstaller.utils.hooks import collect_submodules, collect_data_files
|
||||
|
||||
|
||||
def pycryptodome_module():
|
||||
@ -10,7 +10,7 @@ def pycryptodome_module():
|
||||
try:
|
||||
import Crypto # noqa: F401
|
||||
print('WARNING: Using Crypto since Cryptodome is not available. '
|
||||
'Install with: pip install pycryptodomex', file=sys.stderr)
|
||||
'Install with: python3 -m pip install pycryptodomex', file=sys.stderr)
|
||||
return 'Crypto'
|
||||
except ImportError:
|
||||
pass
|
||||
@ -21,12 +21,16 @@ def get_hidden_imports():
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# Only `websockets` is required, others are collected just in case
|
||||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
|
||||
|
||||
|
||||
hiddenimports = list(get_hidden_imports())
|
||||
print(f'Adding imports: {hiddenimports}')
|
||||
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
|
||||
|
||||
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
|
||||
|
@ -3,7 +3,6 @@ from math import ceil
|
||||
|
||||
from .compat import compat_ord
|
||||
from .dependencies import Cryptodome
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
if Cryptodome.AES:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
@ -17,15 +16,15 @@ if Cryptodome.AES:
|
||||
else:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
|
||||
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv))))
|
||||
return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
|
||||
|
||||
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
|
||||
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
||||
return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@ -68,7 +67,7 @@ def pad_block(block, padding_mode):
|
||||
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
|
||||
|
||||
if padding_mode == 'iso7816' and padding_size:
|
||||
block = block + [0x80] # NB: += mutates list
|
||||
block = [*block, 0x80] # NB: += mutates list
|
||||
padding_size -= 1
|
||||
|
||||
return block + [PADDING_BYTE[padding_mode]] * padding_size
|
||||
@ -84,7 +83,7 @@ def aes_ecb_encrypt(data, key, iv=None):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
@ -104,15 +103,13 @@ def aes_ecb_decrypt(data, key, iv=None):
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
encrypted_data += aes_decrypt(block, expanded_key)
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
return encrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, iv):
|
||||
@ -137,7 +134,7 @@ def aes_ctr_encrypt(data, key, iv):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
counter = iter_vector(iv)
|
||||
|
||||
encrypted_data = []
|
||||
@ -148,9 +145,7 @@ def aes_ctr_encrypt(data, key, iv):
|
||||
|
||||
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
|
||||
encrypted_data += xor(block, cipher_counter_block)
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
return encrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
@ -163,7 +158,7 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
|
||||
decrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
@ -174,9 +169,7 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
decrypted_block = aes_decrypt(block, expanded_key)
|
||||
decrypted_data += xor(decrypted_block, previous_cipher_block)
|
||||
previous_cipher_block = block
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
return decrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
|
||||
@ -190,7 +183,7 @@ def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
|
||||
encrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
@ -224,10 +217,10 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
|
||||
|
||||
if len(nonce) == 12:
|
||||
j0 = nonce + [0, 0, 0, 1]
|
||||
j0 = [*nonce, 0, 0, 0, 1]
|
||||
else:
|
||||
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
|
||||
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
j0 = ghash(hash_subkey, ghash_in)
|
||||
|
||||
# TODO: add nonce support to aes_ctr_decrypt
|
||||
@ -236,17 +229,17 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
iv_ctr = inc(j0)
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
|
||||
pad_len = len(data) // 16 * 16
|
||||
pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
|
||||
s_tag = ghash(
|
||||
hash_subkey,
|
||||
data
|
||||
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
|
||||
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data
|
||||
+ [0] * pad_len # pad
|
||||
+ list((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||
)
|
||||
|
||||
if tag != aes_ctr_encrypt(s_tag, key, j0):
|
||||
raise ValueError("Mismatching authentication tag")
|
||||
raise ValueError('Mismatching authentication tag')
|
||||
|
||||
return decrypted_data
|
||||
|
||||
@ -288,9 +281,7 @@ def aes_decrypt(data, expanded_key):
|
||||
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
|
||||
data = shift_rows_inv(data)
|
||||
data = sub_bytes_inv(data)
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
|
||||
def aes_decrypt_text(data, password, key_size_bytes):
|
||||
@ -308,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(base64.b64decode(data))
|
||||
password = bytes_to_intlist(password.encode())
|
||||
data = list(base64.b64decode(data))
|
||||
password = list(password.encode())
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
|
||||
@ -318,9 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
||||
return bytes(decrypted_data)
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
@ -428,9 +417,7 @@ def key_expansion(data):
|
||||
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
|
||||
temp = data[-4:]
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
data = data[:expanded_key_size_bytes]
|
||||
|
||||
return data
|
||||
return data[:expanded_key_size_bytes]
|
||||
|
||||
|
||||
def iter_vector(iv):
|
||||
@ -511,7 +498,7 @@ def block_product(block_x, block_y):
|
||||
# NIST SP 800-38D, Algorithm 1
|
||||
|
||||
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
|
||||
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
|
||||
|
||||
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
|
||||
block_v = block_y[:]
|
||||
@ -534,7 +521,7 @@ def ghash(subkey, data):
|
||||
# NIST SP 800-38D, Algorithm 2
|
||||
|
||||
if len(data) % BLOCK_SIZE_BYTES:
|
||||
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
|
||||
|
||||
last_y = [0] * BLOCK_SIZE_BYTES
|
||||
for i in range(0, len(data), BLOCK_SIZE_BYTES):
|
||||
@ -547,19 +534,17 @@ def ghash(subkey, data):
|
||||
__all__ = [
|
||||
'aes_cbc_decrypt',
|
||||
'aes_cbc_decrypt_bytes',
|
||||
'aes_ctr_decrypt',
|
||||
'aes_decrypt_text',
|
||||
'aes_decrypt',
|
||||
'aes_ecb_decrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
|
||||
'aes_cbc_encrypt',
|
||||
'aes_cbc_encrypt_bytes',
|
||||
'aes_ctr_decrypt',
|
||||
'aes_ctr_encrypt',
|
||||
'aes_decrypt',
|
||||
'aes_decrypt_text',
|
||||
'aes_ecb_decrypt',
|
||||
'aes_ecb_encrypt',
|
||||
'aes_encrypt',
|
||||
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
'key_expansion',
|
||||
'pad_block',
|
||||
'pkcs7_padding',
|
||||
|
@ -81,10 +81,10 @@ class Cache:
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
|
||||
|
||||
self._ydl.to_screen(
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
f'Removing cache dir {cachedir} .', skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
|
@ -1,5 +0,0 @@
|
||||
import warnings
|
||||
|
||||
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
|
||||
|
||||
casefold = str.casefold
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
import sys
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
@ -24,36 +23,14 @@ def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
import re
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
else:
|
||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
return c if isinstance(c, int) else ord(c)
|
||||
|
||||
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||
def compat_realpath(path):
|
||||
while os.path.islink(path):
|
||||
path = os.path.abspath(os.readlink(path))
|
||||
return os.path.realpath(path)
|
||||
else:
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
|
||||
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/792
|
||||
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
|
||||
if compat_os_name in ('nt', 'ce'):
|
||||
if os.name in ('nt', 'ce'):
|
||||
def compat_expanduser(path):
|
||||
HOME = os.environ.get('HOME')
|
||||
if not HOME:
|
||||
|
@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import functools # noqa: F401
|
||||
import os
|
||||
|
||||
compat_str = str
|
||||
|
||||
compat_b64decode = base64.b64decode
|
||||
compat_os_name = os.name
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
||||
def compat_shlex_quote(s):
|
||||
from ..utils import shell_quote
|
||||
return shell_quote(s)
|
||||
|
@ -30,11 +30,12 @@ from asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||
from . import compat_expanduser, compat_HTMLParseError
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
@ -70,7 +71,6 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
compat_os_path_realpath = compat_realpath
|
||||
compat_os_path_realpath = os.path.realpath
|
||||
compat_print = print
|
||||
compat_shlex_split = shlex.split
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
workaround_optparse_bug9161 = lambda: None
|
||||
compat_str = str
|
||||
compat_b64decode = base64.b64decode
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
||||
legacy = []
|
||||
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
||||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
|
||||
callback(attr)
|
||||
return ret
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
@functools.cache
|
||||
def from_child(attr):
|
||||
nonlocal child
|
||||
if attr not in allowed_attributes:
|
||||
|
@ -1,26 +0,0 @@
|
||||
# flake8: noqa: F405
|
||||
from functools import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'functools')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
cache # >= 3.9
|
||||
except NameError:
|
||||
cache = lru_cache(maxsize=None)
|
||||
|
||||
try:
|
||||
cached_property # >= 3.8
|
||||
except NameError:
|
||||
class cached_property:
|
||||
def __init__(self, func):
|
||||
update_wrapper(self, func)
|
||||
self.func = func
|
||||
|
||||
def __get__(self, instance, _):
|
||||
if instance is None:
|
||||
return self
|
||||
setattr(instance, self.func.__name__, self.func(instance))
|
||||
return getattr(instance, self.func.__name__)
|
@ -1,16 +1,22 @@
|
||||
tests = {
|
||||
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
|
||||
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
|
||||
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
|
||||
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
|
||||
}
|
||||
|
||||
|
||||
def what(file=None, h=None):
|
||||
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
||||
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
|
||||
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
|
||||
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||
"""
|
||||
if h is None:
|
||||
with open(file, 'rb') as f:
|
||||
h = f.read(12)
|
||||
return next((type_ for type_, test in tests.items() if test(h)), None)
|
||||
|
||||
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
|
||||
return 'webp'
|
||||
|
||||
if h.startswith(b'\x89PNG'):
|
||||
return 'png'
|
||||
|
||||
if h.startswith(b'\xFF\xD8\xFF'):
|
||||
return 'jpeg'
|
||||
|
||||
if h.startswith(b'GIF'):
|
||||
return 'gif'
|
||||
|
||||
return None
|
||||
|
@ -1,7 +1,7 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request
|
||||
del request # noqa: F821
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
@ -7,13 +7,13 @@ passthrough_module(__name__, 'urllib.request')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
from .. import compat_os_name
|
||||
import os
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
if os.name == 'nt':
|
||||
# On older Python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||
# it to http on these older python versions to avoid issues
|
||||
# it to http on these older Python versions to avoid issues
|
||||
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
|
||||
# 1: https://github.com/python/cpython/issues/86793
|
||||
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
|
||||
@ -37,4 +37,4 @@ if compat_os_name == 'nt':
|
||||
def getproxies():
|
||||
return getproxies_environment() or getproxies_registry_patched()
|
||||
|
||||
del compat_os_name
|
||||
del os
|
||||
|
@ -1,6 +1,10 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import datetime as dt
|
||||
import functools
|
||||
import glob
|
||||
import hashlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
@ -14,16 +18,13 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -31,6 +32,8 @@ from .dependencies import (
|
||||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
YoutubeDLError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
@ -43,7 +46,7 @@ from .utils import (
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
@ -83,24 +86,31 @@ def _create_progress_bar(logger):
|
||||
return printer
|
||||
|
||||
|
||||
class CookieLoadError(YoutubeDLError):
|
||||
pass
|
||||
|
||||
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(
|
||||
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
|
||||
try:
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(
|
||||
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
|
||||
|
||||
if cookie_file is not None:
|
||||
is_filename = is_path_like(cookie_file)
|
||||
if is_filename:
|
||||
cookie_file = expand_path(cookie_file)
|
||||
if cookie_file is not None:
|
||||
is_filename = is_path_like(cookie_file)
|
||||
if is_filename:
|
||||
cookie_file = expand_path(cookie_file)
|
||||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
except Exception:
|
||||
raise CookieLoadError('failed to load cookies')
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
|
||||
@ -118,17 +128,18 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not sqlite3:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
'Please use a Python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
search_roots = list(_firefox_browser_dirs())
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
search_roots = [profile]
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||
search_root = ', '.join(map(repr, search_roots))
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
||||
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
@ -142,7 +153,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
|
||||
)), None)
|
||||
if not isinstance(container_id, int):
|
||||
raise ValueError(f'could not find firefox container "{container}" in containers.json')
|
||||
@ -182,12 +193,28 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
def _firefox_browser_dirs():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
yield from map(os.path.expandvars, (
|
||||
R'%APPDATA%\Mozilla\Firefox\Profiles',
|
||||
R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
|
||||
))
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, (
|
||||
'~/.mozilla/firefox',
|
||||
'~/snap/firefox/common/.mozilla/firefox',
|
||||
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||
))
|
||||
|
||||
|
||||
def _firefox_cookie_dbs(roots):
|
||||
for root in map(os.path.abspath, roots):
|
||||
for pattern in ('', '*/', 'Profiles/*/'):
|
||||
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
@ -202,6 +229,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
@ -213,6 +241,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
@ -224,6 +253,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
@ -235,6 +265,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
@ -242,7 +273,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
return {
|
||||
'browser_dir': browser_dir,
|
||||
'keyring_name': keyring_name,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles
|
||||
'supports_profiles': browser_name not in browsers_without_profiles,
|
||||
}
|
||||
|
||||
|
||||
@ -251,7 +282,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
|
||||
if not sqlite3:
|
||||
logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
'Please use a Python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
config = _get_chromium_based_browser_settings(browser_name)
|
||||
@ -268,17 +299,23 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
logger.error(f'{browser_name} does not support profiles')
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
||||
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
||||
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
|
||||
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
|
||||
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
|
||||
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
|
||||
decryptor = get_cookie_decryptor(
|
||||
config['browser_dir'], config['keyring_name'], logger,
|
||||
keyring=keyring, meta_version=meta_version)
|
||||
|
||||
cursor.connection.text_factory = bytes
|
||||
column_names = _get_column_names(cursor, 'cookies')
|
||||
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
||||
@ -307,6 +344,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
counts['unencrypted'] = unencrypted_cookies
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if os.name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
raise
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
@ -324,6 +367,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||
if value is None:
|
||||
return is_encrypted, None
|
||||
|
||||
# In chrome, session cookies have expires_utc set to 0
|
||||
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||
if not expires_utc:
|
||||
expires_utc = None
|
||||
|
||||
return is_encrypted, http.cookiejar.Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
@ -365,22 +413,23 @@ class ChromeCookieDecryptor:
|
||||
raise NotImplementedError('Must be implemented by sub classes')
|
||||
|
||||
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||
if sys.platform == 'darwin':
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
|
||||
elif sys.platform in ('win32', 'cygwin'):
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger)
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
|
||||
|
||||
|
||||
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
self._empty_key = self.derive_key(b'')
|
||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||
self._browser_keyring_name = browser_keyring_name
|
||||
self._keyring = keyring
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
@functools.cached_property
|
||||
def _v11_key(self):
|
||||
@ -409,14 +458,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
if version == b'v10':
|
||||
self._cookie_counts['v10'] += 1
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v10_key, self._empty_key), self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
|
||||
elif version == b'v11':
|
||||
self._cookie_counts['v11'] += 1
|
||||
if self._v11_key is None:
|
||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v11_key, self._empty_key), self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
|
||||
else:
|
||||
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||
@ -425,11 +478,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
|
||||
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
def __init__(self, browser_keyring_name, logger, meta_version=None):
|
||||
self._logger = logger
|
||||
password = _get_mac_keyring_password(browser_keyring_name, logger)
|
||||
self._v10_key = None if password is None else self.derive_key(password)
|
||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
@ -447,7 +501,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
@ -457,10 +512,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
|
||||
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_root, logger):
|
||||
def __init__(self, browser_root, logger, meta_version=None):
|
||||
self._logger = logger
|
||||
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
@ -484,7 +540,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
||||
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
||||
|
||||
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
|
||||
return _decrypt_aes_gcm(
|
||||
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
@ -575,7 +633,7 @@ class DataParser:
|
||||
|
||||
|
||||
def _mac_absolute_time_to_posix(timestamp):
|
||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
||||
return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
|
||||
|
||||
|
||||
def _parse_safari_cookies_header(data, logger):
|
||||
@ -708,40 +766,38 @@ def _get_linux_desktop_environment(env, logger):
|
||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||
if xdg_current_desktop is not None:
|
||||
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
|
||||
|
||||
if xdg_current_desktop == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||
if part == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif part == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif part == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif xdg_current_desktop == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif xdg_current_desktop == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif xdg_current_desktop == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif xdg_current_desktop == 'KDE':
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif xdg_current_desktop == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif xdg_current_desktop == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif xdg_current_desktop == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif xdg_current_desktop == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
else:
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
elif part == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif part == 'KDE':
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif part == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif part == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif part == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif part == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session == 'deepin':
|
||||
@ -794,7 +850,7 @@ def _choose_linux_keyring(logger):
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
linux_keyring = _LinuxKeyring.KWALLET6
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
@ -829,7 +885,7 @@ def _get_kwallet_network_wallet(keyring, logger):
|
||||
'dbus-send', '--session', '--print-reply=literal',
|
||||
f'--dest={service_name}',
|
||||
wallet_path,
|
||||
'org.kde.KWallet.networkWallet'
|
||||
'org.kde.KWallet.networkWallet',
|
||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@ -859,7 +915,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||
'kwallet-query',
|
||||
'--read-password', f'{browser_keyring_name} Safe Storage',
|
||||
'--folder', f'{browser_keyring_name} Keys',
|
||||
network_wallet
|
||||
network_wallet,
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@ -899,9 +955,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
|
||||
for item in col.get_all_items():
|
||||
if item.get_label() == f'{browser_keyring_name} Safe Storage':
|
||||
return item.get_secret()
|
||||
else:
|
||||
logger.error('failed to read from keyring')
|
||||
return b''
|
||||
logger.error('failed to read from keyring')
|
||||
return b''
|
||||
|
||||
|
||||
def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
@ -947,7 +1002,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
@ -970,13 +1025,15 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
|
||||
|
||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
|
||||
for key in keys:
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
if hash_prefix:
|
||||
return plaintext[32:].decode()
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
@ -984,7 +1041,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
|
||||
try:
|
||||
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
|
||||
except ValueError:
|
||||
@ -992,6 +1049,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
return None
|
||||
|
||||
try:
|
||||
if hash_prefix:
|
||||
return plaintext[32:].decode()
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
@ -1021,11 +1080,12 @@ def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
None, # pvReserved: must be NULL
|
||||
None, # pPromptStruct: information about prompts to display
|
||||
0, # dwFlags
|
||||
ctypes.byref(blob_out) # pDataOut
|
||||
ctypes.byref(blob_out), # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
logger.warning('failed to decrypt with DPAPI', only_once=True)
|
||||
return None
|
||||
message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
|
||||
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
|
||||
ctypes.windll.kernel32.LocalFree(blob_out.pbData)
|
||||
@ -1049,17 +1109,20 @@ def _get_column_names(cursor, table_name):
|
||||
return [row[1].decode() for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename, logger):
|
||||
def _newest(files):
|
||||
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||
|
||||
|
||||
def _find_files(root, filename, logger):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
i, paths = 0, []
|
||||
i = 0
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
for curr_root, _, files in os.walk(root):
|
||||
for file in files:
|
||||
i += 1
|
||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||
if file == filename:
|
||||
paths.append(os.path.join(curr_root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
yield os.path.join(curr_root, file)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
@ -1073,7 +1136,7 @@ def _merge_cookie_jars(jars):
|
||||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
@ -1094,24 +1157,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||
|
||||
_RESERVED = {
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"domain",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
"samesite",
|
||||
'expires',
|
||||
'path',
|
||||
'comment',
|
||||
'domain',
|
||||
'max-age',
|
||||
'secure',
|
||||
'httponly',
|
||||
'version',
|
||||
'samesite',
|
||||
}
|
||||
|
||||
_FLAGS = {"secure", "httponly"}
|
||||
_FLAGS = {'secure', 'httponly'}
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r"""
|
||||
_COOKIE_PATTERN = re.compile(r'''
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
@ -1121,7 +1184,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
| # or
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
@ -1129,7 +1192,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII | re.VERBOSE)
|
||||
''', re.ASCII | re.VERBOSE)
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
@ -1216,8 +1279,8 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
or not ignore_expires and cookie.is_expired(now)):
|
||||
if ((not ignore_discard and cookie.discard)
|
||||
or (not ignore_expires and cookie.is_expired(now))):
|
||||
continue
|
||||
name, value = cookie.name, cookie.value
|
||||
if value is None:
|
||||
@ -1225,14 +1288,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
# with no name, whereas http.cookiejar regards it as a
|
||||
# cookie with no value.
|
||||
name, value = '', name
|
||||
f.write('%s\n' % '\t'.join((
|
||||
f.write('{}\n'.format('\t'.join((
|
||||
cookie.domain,
|
||||
self._true_or_false(cookie.domain.startswith('.')),
|
||||
cookie.path,
|
||||
self._true_or_false(cookie.secure),
|
||||
str_or_none(cookie.expires, default=''),
|
||||
name, value
|
||||
)))
|
||||
name, value,
|
||||
))))
|
||||
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
@ -1271,10 +1334,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
return line
|
||||
cookie_list = line.split('\t')
|
||||
if len(cookie_list) != self._ENTRY_LEN:
|
||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
|
||||
cookie = self._CookieFileEntry(*cookie_list)
|
||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
|
||||
return line
|
||||
|
||||
cf = io.StringIO()
|
||||
|
@ -24,7 +24,7 @@ try:
|
||||
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
|
||||
from Crypto.Hash import CMAC, SHA1 # noqa: F401
|
||||
from Crypto.PublicKey import RSA # noqa: F401
|
||||
except ImportError:
|
||||
except (ImportError, OSError):
|
||||
__version__ = f'broken {__version__}'.strip()
|
||||
|
||||
|
||||
|
@ -43,19 +43,28 @@ except Exception as _err:
|
||||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# although sqlite3 is part of the standard library, it is possible to compile Python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
sqlite3 = None
|
||||
|
||||
|
||||
try:
|
||||
import websockets
|
||||
except (ImportError, SyntaxError):
|
||||
# websockets 3.10 on python 3.6 causes SyntaxError
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
except ImportError:
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import urllib3
|
||||
except ImportError:
|
||||
urllib3 = None
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
@ -65,6 +74,10 @@ else:
|
||||
if hasattr(xattr, 'set'): # pyxattr
|
||||
xattr._yt_dlp__identifier = 'pyxattr'
|
||||
|
||||
try:
|
||||
import curl_cffi
|
||||
except ImportError:
|
||||
curl_cffi = None
|
||||
|
||||
from . import Cryptodome
|
||||
|
||||
|
@ -30,11 +30,12 @@ from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .niconico import NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
from .youtube_live_chat import YoutubeLiveChatFD
|
||||
from .bunnycdn import BunnyCdnFD
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
@ -49,12 +50,12 @@ PROTOCOL_MAP = {
|
||||
'http_dash_segments_generator': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||
'bunnycdn': BunnyCdnFD,
|
||||
}
|
||||
|
||||
|
||||
@ -65,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
'rtmp_ffmpeg': 'rtmpF',
|
||||
'http_dash_segments': 'dash',
|
||||
'http_dash_segments_generator': 'dashG',
|
||||
'niconico_dmc': 'dmc',
|
||||
'websocket_frag': 'WSfrag',
|
||||
}
|
||||
if simplify:
|
||||
|
50
plugins/youtube_download/yt_dlp/downloader/bunnycdn.py
Normal file
50
plugins/youtube_download/yt_dlp/downloader/bunnycdn.py
Normal file
@ -0,0 +1,50 @@
|
||||
import hashlib
|
||||
import random
|
||||
import threading
|
||||
|
||||
from .common import FileDownloader
|
||||
from . import HlsFD
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
|
||||
|
||||
class BunnyCdnFD(FileDownloader):
|
||||
"""
|
||||
Downloads from BunnyCDN with required pings
|
||||
Note, this is not a part of public API, and will be removed without notice.
|
||||
DO NOT USE
|
||||
"""
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
|
||||
|
||||
fd = HlsFD(self.ydl, self.params)
|
||||
|
||||
stop_event = threading.Event()
|
||||
ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
|
||||
ping_thread.start()
|
||||
|
||||
try:
|
||||
return fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
stop_event.set()
|
||||
|
||||
def ping_thread(self, stop_event, url, headers, secret, context_id):
|
||||
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
|
||||
ping_interval = 2
|
||||
# Hard coded resolution as it doesn't seem to matter
|
||||
res = 1080
|
||||
paused = 'false'
|
||||
current_time = 0
|
||||
|
||||
while not stop_event.wait(ping_interval):
|
||||
current_time += ping_interval
|
||||
|
||||
time = current_time + round(random.random(), 6)
|
||||
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
||||
ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
|
||||
|
||||
try:
|
||||
self.ydl.urlopen(Request(ping_url, headers=headers)).read()
|
||||
except network_exceptions as e:
|
||||
self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')
|
@ -4,6 +4,7 @@ import functools
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
|
||||
from ..minicurses import (
|
||||
@ -19,9 +20,7 @@ from ..utils import (
|
||||
Namespace,
|
||||
RetryManager,
|
||||
classproperty,
|
||||
decodeArgument,
|
||||
deprecation_warning,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
parse_bytes,
|
||||
@ -32,6 +31,7 @@ from ..utils import (
|
||||
timetuple_from_msec,
|
||||
try_call,
|
||||
)
|
||||
from ..utils._utils import _ProgressState
|
||||
|
||||
|
||||
class FileDownloader:
|
||||
@ -63,6 +63,7 @@ class FileDownloader:
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
progress_delta: The minimum time between progress output, in seconds
|
||||
external_downloader_args: A dictionary of downloader keys (in lower case)
|
||||
and a list of additional command-line arguments for the
|
||||
executable. Use 'default' as the name for arguments to be
|
||||
@ -88,6 +89,9 @@ class FileDownloader:
|
||||
self.params = params
|
||||
self._prepare_multiline_status()
|
||||
self.add_progress_hook(self.report_progress)
|
||||
if self.params.get('progress_delta'):
|
||||
self._progress_delta_lock = threading.Lock()
|
||||
self._progress_delta_time = time.monotonic()
|
||||
|
||||
def _set_ydl(self, ydl):
|
||||
self.ydl = ydl
|
||||
@ -214,7 +218,7 @@ class FileDownloader:
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == '-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
(os.path.exists(filename) and not os.path.isfile(filename)):
|
||||
return filename
|
||||
return filename + '.part'
|
||||
|
||||
@ -268,7 +272,7 @@ class FileDownloader:
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
if not os.path.isfile(filename):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
@ -330,7 +334,7 @@ class FileDownloader:
|
||||
progress_dict), s.get('progress_idx') or 0)
|
||||
self.to_console_title(self.ydl.evaluate_outtmpl(
|
||||
progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
|
||||
progress_dict))
|
||||
progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
|
||||
|
||||
def _format_progress(self, *args, **kwargs):
|
||||
return self.ydl._format_text(
|
||||
@ -354,6 +358,7 @@ class FileDownloader:
|
||||
'_speed_str': self.format_speed(speed).strip(),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
'_percent': 100.0,
|
||||
'_percent_str': self.format_percent(100),
|
||||
})
|
||||
self._report_progress_status(s, join_nonempty(
|
||||
@ -366,13 +371,21 @@ class FileDownloader:
|
||||
if s['status'] != 'downloading':
|
||||
return
|
||||
|
||||
if update_delta := self.params.get('progress_delta'):
|
||||
with self._progress_delta_lock:
|
||||
if time.monotonic() < self._progress_delta_time:
|
||||
return
|
||||
self._progress_delta_time += update_delta
|
||||
|
||||
progress = try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)
|
||||
s.update({
|
||||
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
||||
'_speed_str': self.format_speed(s.get('speed')),
|
||||
'_percent_str': self.format_percent(try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)),
|
||||
'_percent': progress,
|
||||
'_percent_str': self.format_percent(progress),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
|
||||
@ -393,7 +406,7 @@ class FileDownloader:
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
self.to_screen(f'[download] Resuming download at byte {resume_len}')
|
||||
|
||||
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
|
||||
"""Report retry"""
|
||||
@ -421,13 +434,13 @@ class FileDownloader:
|
||||
"""
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
and os.path.exists(filename)
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and os.path.isfile(filename)
|
||||
and not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
@ -437,7 +450,7 @@ class FileDownloader:
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
'total_bytes': os.path.getsize(filename),
|
||||
}, info_dict)
|
||||
self._finish_multiline_status()
|
||||
return True, False
|
||||
@ -478,9 +491,7 @@ class FileDownloader:
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
str_args = [decodeArgument(a) for a in args]
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
exe = os.path.basename(args[0])
|
||||
|
||||
self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
|
||||
self.write_debug(f'{exe} command line: {shell_quote(args)}')
|
||||
|
@ -15,12 +15,15 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}:
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
|
||||
real_downloader = None # No external FD can support --live-from-start
|
||||
else:
|
||||
if info_dict.get('is_live'):
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
real_start = time.time()
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
|
||||
args = []
|
||||
|
@ -1,4 +1,5 @@
|
||||
import enum
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -9,7 +10,6 @@ import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
@ -23,7 +23,6 @@ from ..utils import (
|
||||
cli_valueless_option,
|
||||
determine_ext,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
find_available_port,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
@ -55,7 +54,7 @@ class ExternalFD(FragmentFD):
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
@ -67,7 +66,7 @@ class ExternalFD(FragmentFD):
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
@ -108,7 +107,7 @@ class ExternalFD(FragmentFD):
|
||||
return all((
|
||||
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
|
||||
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
|
||||
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
|
||||
))
|
||||
|
||||
@ -172,7 +171,7 @@ class ExternalFD(FragmentFD):
|
||||
decrypt_fragment = self.decrypter(info_dict)
|
||||
dest, _ = self.sanitize_open(tmpfilename, 'wb')
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
|
||||
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
|
||||
try:
|
||||
src, _ = self.sanitize_open(fragment_filename, 'rb')
|
||||
except OSError as err:
|
||||
@ -184,9 +183,9 @@ class ExternalFD(FragmentFD):
|
||||
dest.write(decrypt_fragment(fragment, src.read()))
|
||||
src.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(encodeFilename(fragment_filename))
|
||||
self.try_remove(fragment_filename)
|
||||
dest.close()
|
||||
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
self.try_remove(f'{tmpfilename}.frag.urls')
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
@ -335,12 +334,12 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--auto-file-renaming=false']
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||
url_list_file = '%s.frag.urls' % tmpfilename
|
||||
cmd += ['--uri-selector=inorder']
|
||||
url_list_file = f'{tmpfilename}.frag.urls'
|
||||
url_list = []
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
|
||||
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||
stream.write('\n'.join(url_list).encode())
|
||||
stream.close()
|
||||
@ -357,7 +356,7 @@ class Aria2cFD(ExternalFD):
|
||||
'id': sanitycheck,
|
||||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
}).encode()
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
@ -416,7 +415,7 @@ class Aria2cFD(ExternalFD):
|
||||
'total_bytes_estimate': total,
|
||||
'eta': (total - downloaded) / (speed or 1),
|
||||
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
|
||||
'elapsed': time.time() - started
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
self._hook_progress(status, info_dict)
|
||||
|
||||
@ -458,8 +457,6 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
@ -491,7 +488,7 @@ class FFmpegFD(ExternalFD):
|
||||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||
|
||||
# These exists only for compatibility. Extractors should use
|
||||
# info_dict['downloader_options']['ffmpeg_args'] instead
|
||||
@ -508,13 +505,13 @@ class FFmpegFD(ExternalFD):
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
if not re.match(r'[\da-zA-Z]+://', proxy):
|
||||
proxy = f'http://{proxy}'
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.')
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
@ -559,7 +556,7 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
is_http = re.match(r'https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
@ -575,7 +572,7 @@ class FFmpegFD(ExternalFD):
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
|
||||
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
args += ['-c', 'copy']
|
||||
@ -615,10 +612,12 @@ class FFmpegFD(ExternalFD):
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
|
||||
|
||||
args += self._configuration_args(('_o1', '_o', ''))
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
|
||||
self._debug_cmd(args)
|
||||
|
||||
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
|
||||
|
@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
|
||||
self.read_bytes(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for i in range(quality_entry_count):
|
||||
for _ in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
segment_run_count = self.read_unsigned_int()
|
||||
segments = []
|
||||
for i in range(segment_run_count):
|
||||
for _ in range(segment_run_count):
|
||||
first_segment = self.read_unsigned_int()
|
||||
fragments_per_segment = self.read_unsigned_int()
|
||||
segments.append((first_segment, fragments_per_segment))
|
||||
@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualitySegmentUrlModifiers
|
||||
for i in range(quality_entry_count):
|
||||
for _ in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
fragments_count = self.read_unsigned_int()
|
||||
fragments = []
|
||||
for i in range(fragments_count):
|
||||
for _ in range(fragments_count):
|
||||
first = self.read_unsigned_int()
|
||||
first_ts = self.read_unsigned_long_long()
|
||||
duration = self.read_unsigned_int()
|
||||
@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
|
||||
self.read_string() # MovieIdentifier
|
||||
server_count = self.read_unsigned_char()
|
||||
# ServerEntryTable
|
||||
for i in range(server_count):
|
||||
for _ in range(server_count):
|
||||
self.read_string()
|
||||
quality_count = self.read_unsigned_char()
|
||||
# QualityEntryTable
|
||||
for i in range(quality_count):
|
||||
for _ in range(quality_count):
|
||||
self.read_string()
|
||||
# DrmData
|
||||
self.read_string()
|
||||
@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
segments_count = self.read_unsigned_char()
|
||||
segments = []
|
||||
for i in range(segments_count):
|
||||
for _ in range(segments_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'asrt'
|
||||
segment = FlvReader(box_data).read_asrt()
|
||||
segments.append(segment)
|
||||
fragments_run_count = self.read_unsigned_char()
|
||||
fragments = []
|
||||
for i in range(fragments_run_count):
|
||||
for _ in range(fragments_run_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'afrt'
|
||||
fragments.append(FlvReader(box_data).read_afrt())
|
||||
@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
rate, media = next(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))
|
||||
|
||||
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
|
||||
man_base_url = get_base_url(doc) or man_url
|
||||
|
@ -9,11 +9,11 @@ import time
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils import DownloadError, RetryManager, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
from ..utils.progress import ProgressCalculator
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
@ -151,7 +151,7 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
self.try_remove(ctx['fragment_filename_sanitized'])
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
@ -187,7 +187,7 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||
ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
|
||||
continuedl = self.params.get('continuedl', True)
|
||||
if continuedl and ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
@ -198,7 +198,7 @@ class FragmentFD(FileDownloader):
|
||||
'.ytdl file is corrupt' if is_corrupt else
|
||||
'Inconsistent state of incomplete fragment download')
|
||||
self.report_warning(
|
||||
'%s. Restarting from the beginning ...' % message)
|
||||
f'{message}. Restarting from the beginning ...')
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
@ -226,8 +226,7 @@ class FragmentFD(FileDownloader):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
ctx_id = ctx.get('ctx_id')
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
# Stores the download progress, updated by the progress hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': resume_len,
|
||||
@ -237,14 +236,8 @@ class FragmentFD(FileDownloader):
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
'fragment_started': start,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
})
|
||||
ctx['started'] = time.time()
|
||||
progress = ProgressCalculator(resume_len)
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
@ -259,38 +252,35 @@ class FragmentFD(FileDownloader):
|
||||
state['max_progress'] = ctx.get('max_progress')
|
||||
state['progress_idx'] = ctx.get('progress_idx')
|
||||
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
state['elapsed'] = progress.elapsed
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
|
||||
# XXX: Fragment resume is not accounted for here
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
/ (state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
progress.total = estimated_size
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
state['total_bytes_estimate'] = progress.total
|
||||
else:
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_total_bytes)
|
||||
ctx['fragment_started'] = time.time()
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
progress.thread_reset()
|
||||
|
||||
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
|
||||
state['speed'] = ctx['speed'] = progress.speed.smooth
|
||||
state['eta'] = progress.eta.smooth
|
||||
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
return ctx['started']
|
||||
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
@ -375,10 +365,10 @@ class FragmentFD(FileDownloader):
|
||||
return decrypt_fragment
|
||||
|
||||
def download_and_append_fragments_multiple(self, *args, **kwargs):
|
||||
'''
|
||||
"""
|
||||
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
|
||||
all args must be either tuple or list
|
||||
'''
|
||||
"""
|
||||
interrupt_trigger = [True]
|
||||
max_progress = len(args)
|
||||
if max_progress == 1:
|
||||
@ -399,7 +389,7 @@ class FragmentFD(FileDownloader):
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
def future_result(future):
|
||||
while True:
|
||||
try:
|
||||
@ -433,7 +423,7 @@ class FragmentFD(FileDownloader):
|
||||
finally:
|
||||
tpe.shutdown(wait=True)
|
||||
if not interrupt_trigger[0] and not is_live:
|
||||
raise KeyboardInterrupt()
|
||||
raise KeyboardInterrupt
|
||||
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
|
||||
# so returning a intermediate result here instead of KeyboardInterrupt on live
|
||||
return result
|
||||
@ -500,7 +490,6 @@ class FragmentFD(FileDownloader):
|
||||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
|
||||
|
||||
class HlsFD(FragmentFD):
|
||||
@ -72,11 +73,23 @@ class HlsFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
s = info_dict.get('hls_media_playlist_data')
|
||||
if s:
|
||||
self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info')
|
||||
else:
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
s_bytes = urlh.read()
|
||||
if self.params.get('write_pages'):
|
||||
dump_filename = _request_dump_filename(
|
||||
man_url, info_dict['id'], None,
|
||||
trim_length=self.params.get('trim_file_name'))
|
||||
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
|
||||
with open(dump_filename, 'wb') as outf:
|
||||
outf.write(s_bytes)
|
||||
s = s_bytes.decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
@ -119,12 +132,12 @@ class HlsFD(FragmentFD):
|
||||
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
|
||||
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
|
||||
|
||||
fragments = []
|
||||
|
||||
@ -160,10 +173,12 @@ class HlsFD(FragmentFD):
|
||||
extra_state = ctx.setdefault('extra_state', {})
|
||||
|
||||
format_index = info_dict.get('format_index')
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
extra_segment_query = None
|
||||
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
|
||||
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
extra_key_query = None
|
||||
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
|
||||
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@ -175,6 +190,7 @@ class HlsFD(FragmentFD):
|
||||
if external_aes_iv:
|
||||
external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
|
||||
byte_range = {}
|
||||
byte_range_offset = 0
|
||||
discontinuity_count = 0
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
@ -190,8 +206,8 @@ class HlsFD(FragmentFD):
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = urljoin(man_url, line)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
|
||||
fragments.append({
|
||||
'frag_index': frag_index,
|
||||
@ -202,6 +218,11 @@ class HlsFD(FragmentFD):
|
||||
})
|
||||
media_sequence += 1
|
||||
|
||||
# If the byte_range is truthy, reset it after appending a fragment that uses it
|
||||
if byte_range:
|
||||
byte_range_offset = byte_range['end']
|
||||
byte_range = {}
|
||||
|
||||
elif line.startswith('#EXT-X-MAP'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
continue
|
||||
@ -212,13 +233,15 @@ class HlsFD(FragmentFD):
|
||||
frag_index += 1
|
||||
map_info = parse_m3u8_attributes(line[11:])
|
||||
frag_url = urljoin(man_url, map_info.get('URI'))
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
|
||||
map_byte_range = {}
|
||||
|
||||
if map_info.get('BYTERANGE'):
|
||||
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
byte_range = {
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0
|
||||
map_byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
@ -227,8 +250,8 @@ class HlsFD(FragmentFD):
|
||||
'frag_index': frag_index,
|
||||
'url': frag_url,
|
||||
'decrypt_info': decrypt_info,
|
||||
'byte_range': byte_range,
|
||||
'media_sequence': media_sequence
|
||||
'byte_range': map_byte_range,
|
||||
'media_sequence': media_sequence,
|
||||
})
|
||||
media_sequence += 1
|
||||
|
||||
@ -244,8 +267,10 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info['KEY'] = external_aes_key
|
||||
else:
|
||||
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if extra_key_query or extra_segment_query:
|
||||
# Fall back to extra_segment_query to key for backwards compat
|
||||
decrypt_info['URI'] = update_url_query(
|
||||
decrypt_info['URI'], extra_key_query or extra_segment_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
|
||||
@ -253,7 +278,7 @@ class HlsFD(FragmentFD):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
splitted_byte_range = line[17:].split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset
|
||||
byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
@ -350,9 +375,8 @@ class HlsFD(FragmentFD):
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
@ -369,7 +393,10 @@ class HlsFD(FragmentFD):
|
||||
|
||||
return output.getvalue().encode()
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
if len(fragments) == 1:
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
else:
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
@ -15,7 +15,6 @@ from ..utils import (
|
||||
ThrottledDownload,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
try_call,
|
||||
@ -58,9 +57,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
if os.path.isfile(ctx.tmpfilename):
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
@ -176,7 +174,7 @@ class HttpFD(FileDownloader):
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
}, info_dict)
|
||||
raise SucceedDownload()
|
||||
raise SucceedDownload
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
@ -194,7 +192,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def close_stream():
|
||||
if ctx.stream is not None:
|
||||
if not ctx.tmpfilename == '-':
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
|
||||
@ -237,8 +235,13 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def retry(e):
|
||||
close_stream()
|
||||
ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
|
||||
else os.path.getsize(encodeFilename(ctx.tmpfilename)))
|
||||
if ctx.tmpfilename == '-':
|
||||
ctx.resume_len = byte_counter
|
||||
else:
|
||||
try:
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
except FileNotFoundError:
|
||||
ctx.resume_len = 0
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
@ -263,20 +266,20 @@ class HttpFD(FileDownloader):
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except OSError as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
self.report_error(f'unable to open for writing: {err}')
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
self.report_error(f'unable to set filesize xattr: {err}')
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except OSError as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
self.report_error(f'unable to write data: {err}')
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
@ -322,7 +325,7 @@ class HttpFD(FileDownloader):
|
||||
elif now - ctx.throttle_start > 3:
|
||||
if ctx.stream is not None and ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
raise ThrottledDownload()
|
||||
raise ThrottledDownload
|
||||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
@ -333,7 +336,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
raise NextFragment
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
for segment in segments:
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
|
@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
|
||||
|
||||
|
||||
class MhtmlFD(FragmentFD):
|
||||
_STYLESHEET = """\
|
||||
_STYLESHEET = '''\
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
@ -45,7 +45,7 @@ body > figure > img {
|
||||
max-width: 100%;
|
||||
max-height: calc(100vh - 5em);
|
||||
}
|
||||
"""
|
||||
'''
|
||||
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
|
||||
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
|
||||
|
||||
@ -57,24 +57,19 @@ body > figure > img {
|
||||
)).decode('us-ascii') + '?='
|
||||
|
||||
def _gen_cid(self, i, fragment, frag_boundary):
|
||||
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
|
||||
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
|
||||
|
||||
def _gen_stub(self, *, fragments, frag_boundary, title):
|
||||
output = io.StringIO()
|
||||
|
||||
output.write((
|
||||
output.write(
|
||||
'<!DOCTYPE html>'
|
||||
'<html>'
|
||||
'<head>'
|
||||
'' '<meta name="generator" content="yt-dlp {version}">'
|
||||
'' '<title>{title}</title>'
|
||||
'' '<style>{styles}</style>'
|
||||
'<body>'
|
||||
).format(
|
||||
version=escapeHTML(YT_DLP_VERSION),
|
||||
styles=self._STYLESHEET,
|
||||
title=escapeHTML(title)
|
||||
))
|
||||
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
|
||||
f'<title>{escapeHTML(title)}</title>'
|
||||
f'<style>{self._STYLESHEET}</style>'
|
||||
'<body>')
|
||||
|
||||
t0 = 0
|
||||
for i, frag in enumerate(fragments):
|
||||
@ -87,15 +82,12 @@ body > figure > img {
|
||||
num=i + 1,
|
||||
t0=srt_subtitles_timecode(t0),
|
||||
t1=srt_subtitles_timecode(t1),
|
||||
duration=formatSeconds(frag['duration'], msec=True)
|
||||
duration=formatSeconds(frag['duration'], msec=True),
|
||||
))
|
||||
except (KeyError, ValueError, TypeError):
|
||||
t1 = None
|
||||
output.write((
|
||||
'<figcaption>Slide #{num}</figcaption>'
|
||||
).format(num=i + 1))
|
||||
output.write('<img src="cid:{cid}">'.format(
|
||||
cid=self._gen_cid(i, frag, frag_boundary)))
|
||||
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
|
||||
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
|
||||
output.write('</figure>')
|
||||
t0 = t1
|
||||
|
||||
@ -126,31 +118,24 @@ body > figure > img {
|
||||
stub = self._gen_stub(
|
||||
fragments=fragments,
|
||||
frag_boundary=frag_boundary,
|
||||
title=title
|
||||
title=title,
|
||||
)
|
||||
|
||||
ctx['dest_stream'].write((
|
||||
'MIME-Version: 1.0\r\n'
|
||||
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'Subject: {title}\r\n'
|
||||
f'Subject: {self._escape_mime(title)}\r\n'
|
||||
'Content-type: multipart/related; '
|
||||
'' 'boundary="{boundary}"; '
|
||||
'' 'type="text/html"\r\n'
|
||||
'X.yt-dlp.Origin: {origin}\r\n'
|
||||
f'boundary="{frag_boundary}"; '
|
||||
'type="text/html"\r\n'
|
||||
f'X.yt-dlp.Origin: {origin}\r\n'
|
||||
'\r\n'
|
||||
'--{boundary}\r\n'
|
||||
f'--{frag_boundary}\r\n'
|
||||
'Content-Type: text/html; charset=utf-8\r\n'
|
||||
'Content-Length: {length}\r\n'
|
||||
f'Content-Length: {len(stub)}\r\n'
|
||||
'\r\n'
|
||||
'{stub}\r\n'
|
||||
).format(
|
||||
origin=origin,
|
||||
boundary=frag_boundary,
|
||||
length=len(stub),
|
||||
title=self._escape_mime(title),
|
||||
stub=stub
|
||||
).encode())
|
||||
f'{stub}\r\n').encode())
|
||||
extra_state['header_written'] = True
|
||||
|
||||
for i, fragment in enumerate(fragments):
|
||||
|
@ -2,58 +2,10 @@ import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
|
||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
@ -64,7 +16,6 @@ class NiconicoLiveFD(FileDownloader):
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
cookies = info_dict.get('cookies')
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
@ -76,12 +27,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookies': str_or_none(cookies) or '',
|
||||
'Origin': f'https://{ws_origin_host}',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||
})
|
||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
@ -91,14 +37,15 @@ class NiconicoLiveFD(FileDownloader):
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'chasePlay': False
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': True,
|
||||
}
|
||||
},
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
@ -124,7 +71,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
self.to_screen(f'[debug] Server said: {recv}')
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
@ -134,7 +81,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
|
@ -8,7 +8,6 @@ from ..utils import (
|
||||
Popen,
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@ -179,15 +178,15 @@ class RtmpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||
prevsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = basic_args + ['--resume']
|
||||
args = [*basic_args, '--resume']
|
||||
if retval == RD_FAILED:
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
retval = run_rtmpdump(args)
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
cursize = os.path.getsize(tmpfilename)
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
@ -196,8 +195,8 @@ class RtmpFD(FileDownloader):
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
|
@ -2,7 +2,7 @@ import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import check_executable, encodeFilename
|
||||
from ..utils import check_executable
|
||||
|
||||
|
||||
class RtspFD(FileDownloader):
|
||||
@ -26,7 +26,7 @@ class RtspFD(FileDownloader):
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'\r[{args[0]}] {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
|
@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
|
||||
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
|
||||
self.report_warning('Live chat download runs until the livestream ends. '
|
||||
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
|
||||
@ -123,8 +123,8 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
data,
|
||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||
|
||||
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
|
||||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
|
||||
or (frag_index == 1 and try_refresh_replay_beginning)
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except HTTPError as err:
|
||||
|
@ -1,16 +1,25 @@
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
from ..globals import extractors as _extractors_context
|
||||
from ..globals import plugin_ies as _plugin_ies_context
|
||||
from ..plugins import PluginSpec, register_plugin_spec
|
||||
|
||||
passthrough_module(__name__, '.extractors')
|
||||
del passthrough_module
|
||||
|
||||
register_plugin_spec(PluginSpec(
|
||||
module_name='extractor',
|
||||
suffix='IE',
|
||||
destination=_extractors_context,
|
||||
plugin_destination=_plugin_ies_context,
|
||||
))
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
from .extractors import _ALL_CLASSES
|
||||
|
||||
return _ALL_CLASSES
|
||||
import_extractors()
|
||||
return list(_extractors_context.value.values())
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
@ -37,6 +46,9 @@ def list_extractors(age_limit=None):
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
from . import extractors
|
||||
import_extractors()
|
||||
return _extractors_context.value[f'{ie_name}IE']
|
||||
|
||||
return getattr(extractors, f'{ie_name}IE')
|
||||
|
||||
def import_extractors():
|
||||
from . import extractors # noqa: F401
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,18 +4,18 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -66,7 +66,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'WWI Centenary',
|
||||
'description': 'md5:c2379ec0ca84072e86b446e536954546',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
|
||||
'info_dict': {
|
||||
@ -74,7 +74,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
|
||||
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
|
||||
'info_dict': {
|
||||
@ -85,7 +85,7 @@ class ABCIE(InfoExtractor):
|
||||
'upload_date': '20200813',
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
@ -94,7 +94,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -125,7 +125,7 @@ class ABCIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
|
||||
if expired:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
@ -163,7 +163,7 @@ class ABCIE(InfoExtractor):
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
'format_id': format_id
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
@ -180,20 +180,100 @@ class ABCIViewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
|
||||
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
|
||||
'info_dict': {
|
||||
'id': 'CO1211V001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 1 Ep 1 Wood For The Trees',
|
||||
'series': 'Utopia',
|
||||
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
|
||||
'upload_date': '20230726',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'CO1211V',
|
||||
'episode_id': 'CO1211V001S00',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Wood For The Trees',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
|
||||
'timestamp': 1690403700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'title': 'Series 11 Ep 1',
|
||||
'series': 'Gruen',
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'LE1927H',
|
||||
'episode_id': 'LE1927H001S00',
|
||||
'season_number': 11,
|
||||
'season': 'Season 11',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode number',
|
||||
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
|
||||
'md5': '77cb7d8434440e3b28fbebe331c2456a',
|
||||
'info_dict': {
|
||||
'id': 'NC2203H039S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2022 Locking Up Kids',
|
||||
'series': 'Four Corners',
|
||||
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
|
||||
'upload_date': '20221114',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'NC2203H',
|
||||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name or number',
|
||||
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
|
||||
'md5': '2e17dec06b13cc81dc119d2565289396',
|
||||
'info_dict': {
|
||||
'id': 'RF2004Q043S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2021',
|
||||
'series': 'Landline',
|
||||
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
|
||||
'upload_date': '20211205',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'RF2004Q',
|
||||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
@ -207,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
|
||||
sig = hmac.new(
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
path.encode(), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
|
||||
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
@ -222,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
stream, lambda x: x['streams']['hls'][sd], str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
@ -255,6 +334,8 @@ class ABCIViewIE(InfoExtractor):
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'episode': self._search_regex(
|
||||
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
@ -275,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'md5:93119346c24a7c322d446d8eece430ff',
|
||||
'series': 'Upper Middle Bogan',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
@ -294,17 +375,39 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'noplaylist': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# 'videoEpisodes' is a dict with `items` key
|
||||
'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
|
||||
'info_dict': {
|
||||
'id': '178458-0',
|
||||
'title': 'Episodes',
|
||||
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
|
||||
'series': '7.30 Mark Humphries Satire',
|
||||
'season': 'Episodes',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
@ -313,12 +416,14 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self.url_result(episode['shareUrl'])
|
||||
for episode in series['_embedded']['videoEpisodes']],
|
||||
'entries': [self.url_result(episode_url, ABCIViewIE)
|
||||
for episode_url in traverse_obj(series, (
|
||||
'_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
|
||||
'id': series.get('id'),
|
||||
'title': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'description': series.get('description'),
|
||||
'series': dict_get(series, ('showTitle', 'displayTitle')),
|
||||
'season': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'thumbnail': series.get('thumbnail'),
|
||||
'thumbnail': traverse_obj(
|
||||
series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||
'id': video_id,
|
||||
'key': 'otv.web.%s.story' % station,
|
||||
'key': f'otv.web.{station}.story',
|
||||
'station': station,
|
||||
})['data']
|
||||
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
title = video.get('title') or video['linkText']
|
||||
|
||||
formats = []
|
||||
|
@ -6,53 +6,54 @@ import hmac
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
OnDemandPagedList,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@ -62,33 +63,27 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
'lt': ticket,
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = list(res.to_bytes(16, 'big'))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
enckey = list(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
return bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
@ -97,11 +92,11 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _generate_aks(cls, deviceid):
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
deviceid = deviceid.encode()
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
ts_1hour_str = str(ts_1hour).encode()
|
||||
|
||||
tmp = None
|
||||
|
||||
@ -113,7 +108,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for i in range(count):
|
||||
for _ in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
@ -133,11 +128,15 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
@ -150,13 +149,12 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode('utf-8'),
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@ -171,13 +169,44 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1'
|
||||
'appVersion': '3.27.1',
|
||||
}, headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})['token']
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
@ -201,14 +230,14 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
'id': '194-25_s2_p1',
|
||||
'title': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'series': '異世界食堂2',
|
||||
'series_number': 2,
|
||||
'season': 'シーズン2',
|
||||
'season_number': 2,
|
||||
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'episode_number': 1,
|
||||
},
|
||||
@ -220,7 +249,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series': 'ゆるキャン△ SEASON2',
|
||||
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series_number': 2,
|
||||
'season_number': 2,
|
||||
'episode_number': 1,
|
||||
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
|
||||
},
|
||||
@ -249,33 +278,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
@ -331,7 +333,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
@ -344,17 +346,18 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
)?
|
||||
''', r'\1', og_desc)
|
||||
|
||||
# canonical URL may contain series and episode number
|
||||
# canonical URL may contain season and episode number
|
||||
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
|
||||
if mobj:
|
||||
seri = int_or_none(mobj.group(1), default=float('inf'))
|
||||
epis = int_or_none(mobj.group(2), default=float('inf'))
|
||||
info['series_number'] = seri if seri < 100 else None
|
||||
info['season_number'] = seri if seri < 100 else None
|
||||
# some anime like Detective Conan (though not available in AbemaTV)
|
||||
# has more than 1000 episodes (1026 as of 2021/11/15)
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@ -372,13 +375,13 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
@ -395,6 +398,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@ -412,19 +416,25 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
|
||||
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
|
||||
info['thumbnails'] = [{'url': thumbnail}]
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?'
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'url': 'https://abema.tv/video/title/90-1887',
|
||||
'info_dict': {
|
||||
'id': '90-1597',
|
||||
'id': '90-1887',
|
||||
'title': 'シャッフルアイランド',
|
||||
'description': 'md5:61b2425308f41a5282a926edda66f178',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
@ -432,41 +442,54 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
'description': 'md5:9b59493d1f3a792bafbc7319258e7af8',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/25-102',
|
||||
'url': 'https://abema.tv/video/title/25-1nzan-whrxe',
|
||||
'info_dict': {
|
||||
'id': '25-102',
|
||||
'title': 'ソードアート・オンライン アリシゼーション',
|
||||
'id': '25-1nzan-whrxe',
|
||||
'title': 'ソードアート・オンライン',
|
||||
'description': 'md5:c094904052322e6978495532bdbf06e6',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
'playlist_mincount': 25,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40',
|
||||
'info_dict': {
|
||||
'title': '〈物語〉シリーズ',
|
||||
'id': '26-2mzbynr-cph',
|
||||
'description': 'md5:e67873de1c88f360af1f0a4b84847a52',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, playlist_id, series_version, page):
|
||||
def _fetch_page(self, playlist_id, series_version, season_id, page):
|
||||
query = {
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
}
|
||||
if season_id:
|
||||
query['seasonId'] = season_id
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query={
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
})
|
||||
query=query)
|
||||
yield from (
|
||||
self.url_result(f'https://abema.tv/video/episode/{x}')
|
||||
for x in traverse_obj(programs, ('programs', ..., 'id')))
|
||||
|
||||
def _entries(self, playlist_id, series_version):
|
||||
def _entries(self, playlist_id, series_version, season_id):
|
||||
return OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, playlist_id, series_version),
|
||||
functools.partial(self._fetch_page, playlist_id, series_version, season_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist_id, season_id = self._match_valid_url(url).group('id', 'season')
|
||||
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
|
||||
self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id,
|
||||
playlist_title=series_info.get('title'),
|
||||
playlist_description=series_info.get('content'))
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
(?:(?:embed|www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
(?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Third Ear Studio',
|
||||
'creators': ['Third Ear Studio'],
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
@ -67,13 +67,16 @@ class ACastIE(ACastBaseIE):
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
@ -93,13 +96,13 @@ class ACastIE(ACastBaseIE):
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
f'{channel}/episodes/{display_id}',
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
@ -110,7 +113,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
(?:(?:www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@ -120,17 +123,20 @@ class ACastChannelIE(ACastBaseIE):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
'description': 'md5:feca253de9947634605080cd9eeea2bf',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://shows.acast.com/sparpodcast',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
return False if ACastIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
|
@ -3,9 +3,10 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@ -24,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': float_or_none(video.get('avgBitrate')),
|
||||
**parse_codecs(video.get('codecs', ''))
|
||||
**parse_codecs(video.get('codecs', '')),
|
||||
})
|
||||
|
||||
return {
|
||||
@ -76,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
|
||||
'duration': 760.0,
|
||||
'season': '红孩儿之趴趴蛙寻石记',
|
||||
'season_id': 5023171,
|
||||
'season_id': '5023171',
|
||||
'season_number': 1, # series has only 1 season
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': '叽歪老表(第二季) 第5话 坚不可摧',
|
||||
'season': '叽歪老表(第二季)',
|
||||
'season_number': 2,
|
||||
'season_id': 6065485,
|
||||
'season_id': '6065485',
|
||||
'episode': '坚不可摧',
|
||||
'episode_number': 5,
|
||||
'upload_date': '20220324',
|
||||
@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': json_bangumi_data.get('showTitle'),
|
||||
'thumbnail': json_bangumi_data.get('image'),
|
||||
'season': json_bangumi_data.get('bangumiTitle'),
|
||||
'season_id': season_id,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'episode': json_bangumi_data.get('title'),
|
||||
'episode_number': episode_number,
|
||||
|
@ -3,33 +3,53 @@ import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_long,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@ -44,29 +64,32 @@ class ADNIE(InfoExtractor):
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
@ -83,9 +106,9 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
base64.b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
base64.b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
@ -108,7 +131,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
|
||||
ass_subtitles_timecode(start),
|
||||
ass_subtitles_timecode(end),
|
||||
'{\\a%d}' % alignment if alignment != 2 else '',
|
||||
@ -116,6 +139,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
@ -137,7 +162,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@ -147,8 +172,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata',
|
||||
@ -157,26 +183,29 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
message = list(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
padded_message = bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
@ -184,12 +213,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
'X-Player-Token': authorization,
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
'source': 'Web',
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
@ -202,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
@ -221,7 +251,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
'Downloading %s %s JSON metadata' % (format_id, quality),
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@ -232,11 +263,17 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@ -255,3 +292,38 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(join_nonempty(
|
||||
'https://animationdigitalnetwork.com', lang, 'video',
|
||||
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
@ -1,8 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AdobeConnectIE(InfoExtractor):
|
||||
@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
for con_string in qs['conStrings'][0].split(','):
|
||||
formats.append({
|
||||
'format_id': con_string.split('://')[0],
|
||||
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + qs['streamName'][0],
|
||||
'rtmp_conn': 'S:' + qs['ticket'][0],
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,13 +2,12 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
|
||||
return subtitles
|
||||
|
||||
def _parse_video_data(self, video_data):
|
||||
video_id = compat_str(video_data['id'])
|
||||
video_id = str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
s3_extracted = False
|
||||
@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||
page += 1
|
||||
query['page'] = page
|
||||
for element_data in self._call_api(
|
||||
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||
self._RESOURCE, display_id, query, f'Download Page {page}'):
|
||||
yield self._process_data(element_data)
|
||||
|
||||
def _extract_playlist_entries(self, display_id, query):
|
||||
|
@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
getShowBySlug(slug:"%s") {
|
||||
%%s
|
||||
}
|
||||
}''' % show_path
|
||||
}''' % show_path # noqa: UP031
|
||||
if episode_path:
|
||||
query = query % '''title
|
||||
getVideoBySlug(slug:"%s") {
|
||||
@ -107,7 +107,6 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
title
|
||||
tvRating
|
||||
}''' % episode_path
|
||||
['getVideoBySlug']
|
||||
else:
|
||||
query = query % '''metaDescription
|
||||
title
|
||||
@ -129,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
episode_title = title = video_data['title']
|
||||
series = show_data.get('title')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
title = f'{series} - {title}'
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -192,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
f'http://adultswim.com/videos/{show_path}/{slug}',
|
||||
'AdultSwim', video.get('_id')))
|
||||
return self.playlist_result(
|
||||
entries, show_path, show_data.get('title'),
|
||||
|
@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
@ -121,18 +121,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:a40e370925074260b1c8a633c632c63a',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 2592.0,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:5',
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 1277.695,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -152,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -190,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields),
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = 'http://watch.%s' % domain
|
||||
base_url = f'http://watch.{domain}'
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
@ -229,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
@ -290,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "History of Valentine’s Day",
|
||||
'title': 'History of Valentine’s Day',
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
@ -338,12 +357,13 @@ class BiographyIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348
|
||||
}
|
||||
'duration': 348,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
'duration': 1344,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
|
@ -1,142 +1,26 @@
|
||||
import datetime as dt
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
date_from_str,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
class AfreecaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'afreecatv'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
||||
'upload_date': '20230108',
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def parse_video_key(key):
|
||||
video_key = {}
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = {
|
||||
@ -150,21 +34,21 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@ -173,169 +57,206 @@ class AfreecaTVIE(InfoExtractor):
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
@staticmethod
|
||||
def _fixup_thumb(thumb_url):
|
||||
if not url_or_none(thumb_url):
|
||||
return None
|
||||
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
|
||||
return [{'url': thumb_url, 'ext': 'jpg'}]
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673186405,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'timestamp': 1491929865,
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
partial_view = False
|
||||
adult_view = False
|
||||
for _ in range(2):
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view',
|
||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
if traverse_obj(data, ('code', {int})) == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
query = {
|
||||
data = self._call_api(
|
||||
'station/video/a/view', video_id, headers={'Referer': url},
|
||||
data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': data['station_no'],
|
||||
'nBbsNo': data['bbs_no'],
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
if adult_view:
|
||||
query['adultView'] = 'ADULT_VIEW'
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, 'Downloading video info XML%s'
|
||||
% (' (skipping adult)' if partial_view else ''),
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
}, query=query)
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
partial_view = True
|
||||
continue
|
||||
elif flag == 'ADULT':
|
||||
if not adult_view:
|
||||
adult_view = True
|
||||
continue
|
||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
||||
else:
|
||||
error = flag
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
error_code = traverse_obj(data, ('code', {int}))
|
||||
if error_code == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
elif error_code == -6205:
|
||||
raise ExtractorError('This VOD is private', expected=True)
|
||||
|
||||
video_element = video_xml.findall('./track/video')[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
||||
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(
|
||||
video_xml, './track/duration', 'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
common_entry = {
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
info = common_entry.copy()
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
common_info = traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
})
|
||||
|
||||
if not video_url:
|
||||
entries = []
|
||||
file_elements = video_element.findall('./file')
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'^(\d{8})_', key, 'upload date', default=None))
|
||||
if upload_date is not None:
|
||||
# sometimes the upload date isn't included in the file name
|
||||
# instead, another random ID is, which may parse as a valid
|
||||
# date but be wildly out of a reasonable range
|
||||
parsed_date = date_from_str(upload_date)
|
||||
if parsed_date.year < 2000 or parsed_date.year >= 2100:
|
||||
upload_date = None
|
||||
file_duration = int_or_none(file_element.get('duration'))
|
||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
if not formats and not self.get_param('ignore_no_formats'):
|
||||
continue
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
})
|
||||
entries.append(file_info)
|
||||
entries_info = info.copy()
|
||||
entries_info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries,
|
||||
})
|
||||
return entries_info
|
||||
entries = []
|
||||
for file_num, file_element in enumerate(
|
||||
traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
|
||||
file_url = file_element['file']
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', m3u8_id='hls',
|
||||
note=f'Downloading part {file_num} m3u8 information')
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
app, playpath = video_url.split('mp4:')
|
||||
info.update({
|
||||
'url': app,
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + playpath,
|
||||
'rtmp_live': True, # downloading won't end without this
|
||||
entries.append({
|
||||
**common_info,
|
||||
'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {parse_iso8601(delimiter=' ', timezone=dt.timedelta(hours=9))}),
|
||||
}),
|
||||
})
|
||||
|
||||
return info
|
||||
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
|
||||
if not entries:
|
||||
self.raise_login_required(
|
||||
'Only users older than 19 are able to watch this video', method='password')
|
||||
self.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are '
|
||||
'restricted from watching adult content. Only content suitable for all '
|
||||
f'ages will be downloaded. {self._login_hint("password")}')
|
||||
|
||||
if not entries and traverse_obj(data, ('sub_upload_type', {str})):
|
||||
self.raise_login_required('This VOD is for subscribers only', method='password')
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'title': common_info.get('title'),
|
||||
}
|
||||
|
||||
common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
|
||||
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
IE_NAME = 'afreecatv:live'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
def _entries(self, data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@ -347,94 +268,121 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'http://play.afreeca.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.afreeca.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
_QUALITIES = ('sd', 'hd', 'hd2k', 'original')
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
*traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
|
||||
*self._WORKING_CDNS,
|
||||
])
|
||||
cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
|
||||
|
||||
for attempt, cdn_id in enumerate(cdn_ids, start=1):
|
||||
m3u8_url = traverse_obj(self._download_json(
|
||||
urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
|
||||
f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
|
||||
fatal=False, query={
|
||||
'return_type': cdn_id,
|
||||
'broad_key': f'{broadcast_no}-common-master-hls',
|
||||
}), ('view_url', {url_or_none}))
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
self.report_warning(
|
||||
f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
password = self.get_param('videopassword')
|
||||
channel_info = traverse_obj(self._download_json(
|
||||
self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
|
||||
('CHANNEL', {dict})) or {}
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
password_protected = channel_info.get('BPWD')
|
||||
if not broadcast_no:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
if password_protected == 'Y' and password is None:
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
raise ExtractorError(
|
||||
'This livestream is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
params = {
|
||||
token_info = traverse_obj(self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
|
||||
'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}
|
||||
if password is not None:
|
||||
params['pwd'] = password
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata(params),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
if not aid:
|
||||
continue
|
||||
'quality': 'master',
|
||||
'pwd': password,
|
||||
}))), ('CHANNEL', {dict})) or {}
|
||||
aid = token_info.get('AID')
|
||||
if not aid:
|
||||
result = token_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise ExtractorError('This livestream has ended', expected=True)
|
||||
elif result == -6:
|
||||
self.raise_login_required('This livestream is for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract access token')
|
||||
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_info = self._download_json(
|
||||
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
|
||||
query={
|
||||
'return_type': channel_info.get('CDN', 'gcp_cdn'),
|
||||
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
|
||||
},
|
||||
note=f'Downloading metadata for {quality_str} stream',
|
||||
errnote=f'Unable to download metadata for {quality_str} stream') or {}
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
if stream_info.get('view_url'):
|
||||
formats.append({
|
||||
'format_id': quality_str,
|
||||
'url': update_url_query(stream_info['view_url'], {'aid': aid}),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': quality_key(quality_str),
|
||||
})
|
||||
|
||||
station_info = self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
query={'szBjId': broadcaster_id}, fatal=False,
|
||||
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
return {
|
||||
'id': broadcast_no,
|
||||
'title': channel_info.get('TITLE') or station_info.get('station_title'),
|
||||
'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
|
||||
'uploader_id': broadcaster_id,
|
||||
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
||||
'timestamp': parse_iso8601(station_info.get('broad_start'), delimiter=' ', timezone=dt.timedelta(hours=9)),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -442,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@ -450,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -458,7 +406,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -470,12 +418,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
||||
'info_dict': {
|
||||
'id': '91275',
|
||||
'ext': 'aac',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'series': 'Analizy',
|
||||
@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
raise ExtractorError('No such podcast', expected=True)
|
||||
metadata = metadata[0]
|
||||
|
||||
formats = []
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
'url': url_data['link_ssl'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
})
|
||||
mp3_url = self._download_json(
|
||||
'https://api.podcast.radioagora.pl/api4/getSongUrl',
|
||||
media_id, 'Downloading podcast mp3 URL', query={
|
||||
'podcast_id': media_id,
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'ppre': 'false',
|
||||
'audio': 'mp3',
|
||||
})['link_ssl']
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'url': mp3_url,
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
'title': metadata.get('podcast_name'),
|
||||
'series': metadata.get('series_name'),
|
||||
'episode': metadata.get('podcast_name'),
|
||||
@ -206,8 +203,8 @@ class TokFMAuditionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
def _create_url(video_id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{video_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
@ -1,63 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
'location': 'SFO Commons',
|
||||
'duration': 3780,
|
||||
'view_count': int,
|
||||
'categories': ['Main', 'Privacy'],
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._parse_json(self._search_regex(
|
||||
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
|
@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
|
||||
'timestamp': 1667370519,
|
||||
'title': 'Ангел хранитель 1 серия',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
|
@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
data, lambda x: x['followBar']['name'], str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'timestamp': 1636219149,
|
||||
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
|
||||
'upload_date': '20211106',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
|
||||
'info_dict': {
|
||||
@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
base, post_type, id = self._match_valid_url(url).groups()
|
||||
base, post_type, display_id = self._match_valid_url(url).groups()
|
||||
wp = {
|
||||
'balkans.aljazeera.net': 'ajb',
|
||||
'chinese.aljazeera.net': 'chinese',
|
||||
@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'news': 'news',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
f'https://{base}/graphql', id, query={
|
||||
f'https://{base}/graphql', display_id, query={
|
||||
'wp-site': wp,
|
||||
'operationName': 'ArchipelagoSingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': id,
|
||||
'name': display_id,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
embed = 'default'
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
|
||||
group=(1, 2, 3, 4), default=(None, None, None, None))
|
||||
@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'ie_key': 'Generic'
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
'ie_key': 'BrightcoveNew'
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
video, lambda x: x['added_at']['date'], str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
|
252
plugins/youtube_download/yt_dlp/extractor/allstar.py
Normal file
252
plugins/youtube_download/yt_dlp/extractor/allstar.py
Normal file
@ -0,0 +1,252 @@
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
clipImageThumb
|
||||
clipLink
|
||||
clipTitle
|
||||
createdDate
|
||||
shareId
|
||||
user { _id }
|
||||
username
|
||||
views'''
|
||||
|
||||
_EXTRA_FIELDS = '''
|
||||
clipLength
|
||||
clipSizeBytes'''
|
||||
|
||||
_QUERIES = {
|
||||
'clip': '''query ($id: String!) {
|
||||
video: getClip(clipIdentifier: $id) {
|
||||
%s %s
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'montage': '''query ($id: String!) {
|
||||
video: getMontage(clipIdentifier: $id) {
|
||||
%s
|
||||
}
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'Montages': '''query ($page: Int!, $user: String!) {
|
||||
videos: montages(search: createdDate, page: $page, user: $user) {
|
||||
data { %s }
|
||||
}
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Mobile Clips': '''query ($page: Int!, $user: String!) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
}
|
||||
|
||||
|
||||
class AllstarBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_video_data(video_data):
|
||||
def media_url_or_none(path):
|
||||
return urljoin('https://media.allstar.gg/', path)
|
||||
|
||||
info = traverse_obj(video_data, {
|
||||
'id': ('_id', {str}),
|
||||
'display_id': ('shareId', {str}),
|
||||
'title': ('clipTitle', {str}),
|
||||
'url': ('clipLink', {media_url_or_none}),
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
})
|
||||
|
||||
if info.get('id') and info.get('url'):
|
||||
basename = 'clip' if '/clips/' in info['url'] else 'montage'
|
||||
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
|
||||
|
||||
info.update({
|
||||
'extractor_key': AllstarIE.ie_key(),
|
||||
'extractor': AllstarIE.IE_NAME,
|
||||
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, query, variables, path, video_id=None, note=None):
|
||||
response = self._download_json(
|
||||
'https://a1.allstar.gg/graphql', video_id, note=note,
|
||||
headers={'content-type': 'application/json'},
|
||||
data=json.dumps({'variables': variables, 'query': query}).encode())
|
||||
|
||||
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
|
||||
if errors:
|
||||
raise ExtractorError('; '.join(errors))
|
||||
|
||||
return traverse_obj(response, path)
|
||||
|
||||
|
||||
class AllstarIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
|
||||
'info_dict': {
|
||||
'id': '64482c2da9eec30008a67d1b',
|
||||
'title': '4K on Inferno',
|
||||
'url': 'md5:66befb5381eef0c9456026386c25fa55',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'uploader': 'chrk.',
|
||||
'ext': 'mp4',
|
||||
'duration': 20,
|
||||
'filesize': 21199257,
|
||||
'timestamp': 1682451501,
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230425',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
|
||||
'info_dict': {
|
||||
'id': '64a1ec6b887f4c0008dc50b8',
|
||||
'display_id': '8LJLY4JKB',
|
||||
'title': 'AK-47 3K on Mirage',
|
||||
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'duration': 16,
|
||||
'filesize': 30175859,
|
||||
'timestamp': 1688333419,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230702',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
|
||||
'info_dict': {
|
||||
'id': '643e64089da7e9363e1fa66c',
|
||||
'display_id': 'APQLGM2IMXW',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:a3ee356022115db2b27c81321d195945',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1681810448,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230418',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
|
||||
'info_dict': {
|
||||
'id': '64a2697372ce3703de29e868',
|
||||
'display_id': 'RILJMH6QOS',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1688365434,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230703',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query_id, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
return self._parse_video_data(
|
||||
self._call_api(
|
||||
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
|
||||
|
||||
|
||||
class AllstarProfileIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips',
|
||||
'title': 'cherokee - Clips',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips-730',
|
||||
'title': 'cherokee - Clips - 730',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-montages',
|
||||
'title': 'cherokee - Montages',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-mobile',
|
||||
'title': 'cherokee - Mobile Clips',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _get_page(self, user_id, display_id, game, query, page_num):
|
||||
page_num += 1
|
||||
|
||||
for video_data in self._call_api(
|
||||
query, {
|
||||
'user': user_id,
|
||||
'page': page_num,
|
||||
'game': game,
|
||||
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
|
||||
yield self._parse_video_data(video_data)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
profile_data = self._download_json(
|
||||
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
|
||||
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
|
||||
if not user_id:
|
||||
raise ExtractorError('Unable to extract the user id')
|
||||
|
||||
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
|
||||
url_query = parse_qs(url)
|
||||
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
|
||||
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
|
||||
|
||||
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
|
||||
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(
|
||||
functools.partial(
|
||||
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
|
||||
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
|
||||
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))
|
@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
104
plugins/youtube_download/yt_dlp/extractor/altcensored.py
Normal file
104
plugins/youtube_download/yt_dlp/extractor/altcensored.py
Normal file
@ -0,0 +1,104 @@
|
||||
import re
|
||||
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AltCensoredIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
|
||||
'info_dict': {
|
||||
'id': 'youtube-k0srjLSkga8',
|
||||
'ext': 'webm',
|
||||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creators': ['Virginie Vota'],
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
|
||||
'timestamp': 1679161343,
|
||||
'track': 'k0srjLSkga8',
|
||||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
category = clean_html(self._html_search_regex(
|
||||
r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://archive.org/details/youtube-{video_id}',
|
||||
'ie_key': ArchiveOrgIE.ie_key(),
|
||||
'view_count': str_to_int(self._html_search_regex(
|
||||
r'YouTube Views:(?:\s| )*([\d,]+)', webpage, 'view count', default=None)),
|
||||
'categories': [category] if category else None,
|
||||
}
|
||||
|
||||
|
||||
class AltCensoredChannelIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
|
||||
_PAGE_SIZE = 24
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
'info_dict': {
|
||||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 85,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
|
||||
'info_dict': {
|
||||
'title': 'Mister Metokur',
|
||||
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
|
||||
},
|
||||
'playlist_count': 121,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
page_num += 1
|
||||
webpage = self._download_webpage(
|
||||
f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
|
||||
channel_id, note=f'Downloading page {page_num}')
|
||||
|
||||
items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
|
||||
return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
|
||||
for path in orderedSet(items)]
|
||||
|
||||
return self.playlist_result(
|
||||
InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
|
||||
playlist_id=channel_id, playlist_title=title)
|
@ -1,17 +1,13 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
@ -25,7 +21,7 @@ class AluraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '60095',
|
||||
'ext': 'mp4',
|
||||
'title': 'Referências, ref-set e alter'
|
||||
'title': 'Referências, ref-set e alter',
|
||||
},
|
||||
'skip': 'Requires alura account credentials'},
|
||||
{
|
||||
@ -34,12 +30,12 @@ class AluraIE(InfoExtractor):
|
||||
'only_matching': True},
|
||||
{
|
||||
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
|
||||
'only_matching': True}
|
||||
'only_matching': True},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
course, video_id = self._match_valid_url(url)
|
||||
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||
video_url = self._VIDEO_URL % (course, video_id)
|
||||
|
||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||
@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video_obj in video_dict:
|
||||
video_url_m3u8 = video_obj.get('link')
|
||||
video_url_m3u8 = video_obj.get('mp4')
|
||||
video_format = self._extract_m3u8_formats(
|
||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
@ -66,7 +62,7 @@ class AluraIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
"formats": formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@ -95,7 +91,7 @@ class AluraIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
@ -107,7 +103,7 @@ class AluraIE(InfoExtractor):
|
||||
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@ -123,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
|
||||
return False if AluraIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@ -161,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
'url': video_url,
|
||||
'id_key': self.ie_key(),
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
entries.append(entry)
|
||||
return self.playlist_result(entries, course_path, course_title)
|
||||
|
77
plugins/youtube_download/yt_dlp/extractor/amadeustv.py
Normal file
77
plugins/youtube_download/yt_dlp/extractor/amadeustv.py
Normal file
@ -0,0 +1,77 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
'uploader_id': 'sammorrill',
|
||||
},
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
f'https://amara.org/api/videos/{video_id}/',
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
|
||||
|
||||
|
||||
class AmazonReviewsIE(InfoExtractor):
|
||||
|
@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb',
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@ -63,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||
% (requestor_id.lower(), display_id), display_id)['data']
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
@ -75,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + releasePid
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
@ -130,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
|
@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
resource_type = 'episodes'
|
||||
|
||||
resource = self._download_json(
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
playlist_id = f'season_{season_number}'
|
||||
playlist_title = f'Season {season_number}'
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = 'media-%s' % name
|
||||
media_name = f'media-{name}'
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
|
@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
|
||||
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 1359.0
|
||||
}
|
||||
'duration': 1359.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
|
||||
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
|
||||
@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
|
||||
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 3276.0
|
||||
}
|
||||
'duration': 3276.0,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# Angel uses cloudinary in the background and supports image transformations.
|
||||
|
@ -5,22 +5,26 @@ from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
make_archive_id,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
class AntennaBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
|
||||
video_id, query={'cid': cid or video_id})
|
||||
if not info.get('url'):
|
||||
raise ExtractorError(f'No source found for {video_id}')
|
||||
|
||||
ext = determine_ext(info['url'])
|
||||
if ext == 'm3u8':
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
|
||||
else:
|
||||
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
|
||||
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
class AntennaGrWatchIE(AntennaBaseIE):
|
||||
IE_NAME = 'antenna:watch'
|
||||
IE_DESC = 'antenna.gr and ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
|
||||
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
|
||||
'info_dict': {
|
||||
'id': '1643812',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -52,25 +66,26 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
info['description'] = self._og_search_description(webpage, default=None)
|
||||
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)]
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
'timestamp': 1666166520,
|
||||
'upload_date': '20221019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
@ -90,19 +105,19 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
raise ExtractorError(f'no videos found for {video_id}', expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
_API_PATH = '/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
@ -8,10 +8,8 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@ -238,32 +218,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
auth_secret = bytes(aes_encrypt(
|
||||
list(input_data[:64].encode()), list(self._AUTH_KEY)))
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
@ -299,7 +252,7 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode())
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
@ -358,7 +311,7 @@ class AnvatoIE(InfoExtractor):
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
@ -29,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
|
||||
@ -45,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
|
||||
'only_matching': True,
|
||||
@ -82,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
|
@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
f'{base_url}/player/{video_id}', video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
|
||||
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
|
@ -1,8 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
@ -1,30 +1,45 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
]
|
||||
],
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'format_id': f'{version}-{size}',
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '''))
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
return f'<html>{s}</html>'
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
for fmt in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
'format': fmt['type'],
|
||||
'width': int_or_none(fmt['width']),
|
||||
'height': int_or_none(fmt['height']),
|
||||
})
|
||||
|
||||
playlist.append({
|
||||
@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
|
||||
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
|
@ -1,11 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@ -32,6 +32,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@ -50,10 +51,9 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'release_date': '19681210',
|
||||
'timestamp': 1268695290,
|
||||
'upload_date': '20100315',
|
||||
'creator': 'SRI International',
|
||||
'creators': ['SRI International'],
|
||||
'uploader': 'laura@archive.org',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'release_year': 1968,
|
||||
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
|
||||
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
||||
@ -111,7 +111,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Turning',
|
||||
'ext': 'flac',
|
||||
'track': 'Turning',
|
||||
'creator': 'Grateful Dead',
|
||||
'creators': ['Grateful Dead'],
|
||||
'display_id': 'gd1977-05-08d01t01.flac',
|
||||
'track_number': 1,
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
@ -131,11 +131,10 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'location': 'Barton Hall - Cornell University',
|
||||
'duration': 438.68,
|
||||
'track': 'Deal',
|
||||
'creator': 'Grateful Dead',
|
||||
'creators': ['Grateful Dead'],
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'release_date': '19770508',
|
||||
'display_id': 'gd1977-05-08d01t07.flac',
|
||||
'release_year': 1977,
|
||||
'track_number': 7,
|
||||
},
|
||||
}, {
|
||||
@ -147,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Bells Of Rostov',
|
||||
'ext': 'mp3',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
|
||||
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
|
||||
@ -160,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:012b2d668ae753be36896f343d12a236',
|
||||
'upload_date': '20190928',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
# Original formats are private
|
||||
'url': 'https://archive.org/details/irelandthemakingofarepublic',
|
||||
@ -170,7 +169,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'upload_date': '20160610',
|
||||
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
|
||||
'uploader': 'dimitrios@archive.org',
|
||||
'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'timestamp': 1465594947,
|
||||
},
|
||||
'playlist': [
|
||||
@ -204,8 +203,28 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
],
|
||||
}, {
|
||||
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'info_dict': {
|
||||
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'ext': 'mp3',
|
||||
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'creators': ['Grateful Dead'],
|
||||
'duration': 338.31,
|
||||
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||
'display_id': 'gd95-04-02d1t04.shn',
|
||||
'location': 'Pyramid Arena',
|
||||
'uploader': 'jon@archive.org',
|
||||
'album': '1995-04-02 - Pyramid Arena',
|
||||
'upload_date': '20040519',
|
||||
'track_number': 4,
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -222,7 +241,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
|
||||
identifier, _, entry_id = video_id.partition('/')
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
@ -248,7 +267,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/'),
|
||||
}
|
||||
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
@ -260,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': m['title'],
|
||||
'description': clean_html(m.get('description')),
|
||||
'uploader': dict_get(m, ['uploader', 'adder']),
|
||||
'creator': m.get('creator'),
|
||||
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
@ -275,7 +294,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': f.get('title') or f['name'],
|
||||
'display_id': f['name'],
|
||||
'description': clean_html(f.get('description')),
|
||||
'creator': f.get('creator'),
|
||||
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'duration': parse_duration(f.get('length')),
|
||||
'track_number': int_or_none(f.get('track')),
|
||||
'album': f.get('album'),
|
||||
@ -295,7 +314,9 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'height': int_or_none(f.get('width')),
|
||||
'filesize': int_or_none(f.get('size'))})
|
||||
|
||||
extension = (f['name'].rsplit('.', 1) + [None])[1]
|
||||
_, has_ext, extension = f['name'].rpartition('.')
|
||||
if not has_ext:
|
||||
extension = None
|
||||
|
||||
# We don't want to skip private formats if the user has access to them,
|
||||
# however without access to an account with such privileges we can't implement/test this.
|
||||
@ -303,14 +324,14 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https',
|
||||
'source_preference': 0 if f.get('source') == 'original' else -1,
|
||||
'format_note': f.get('source')
|
||||
'format_note': f.get('source'),
|
||||
})
|
||||
|
||||
for entry in entries.values():
|
||||
@ -334,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
info['comments'].append({
|
||||
'id': review.get('review_id'),
|
||||
'author': review.get('reviewer'),
|
||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
||||
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||
'timestamp': unified_timestamp(review.get('createdate')),
|
||||
'parent': 'root'})
|
||||
|
||||
@ -373,7 +394,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Internal link
|
||||
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
|
||||
@ -390,7 +411,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
|
||||
@ -405,8 +426,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
'uploader': 'machinima',
|
||||
},
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
|
||||
@ -423,7 +444,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
'info_dict': {
|
||||
@ -439,7 +460,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
|
||||
@ -456,7 +477,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
|
||||
@ -475,15 +496,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
]
|
||||
r'unable to download capture webpage \(it may not be archived\)',
|
||||
],
|
||||
}, { # Very old YouTube page, has - YouTube in title.
|
||||
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
|
||||
'info_dict': {
|
||||
'id': '-06-KB9XTzg',
|
||||
'ext': 'flv',
|
||||
'title': 'New Coin Hack!! 100% Safe!!'
|
||||
}
|
||||
'title': 'New Coin Hack!! 100% Safe!!',
|
||||
},
|
||||
}, {
|
||||
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
|
||||
'info_dict': {
|
||||
@ -497,7 +518,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
|
||||
@ -514,7 +535,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
@ -529,7 +550,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
@ -544,7 +565,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
@ -559,7 +580,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
@ -573,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
@ -589,7 +610,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
@ -606,7 +627,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
@ -621,7 +642,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
@ -636,35 +657,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Video not archived, only capture is unavailable video page
|
||||
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, { # Encoded url
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
@ -675,13 +696,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
|
||||
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
|
||||
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@ -690,14 +711,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'limit': 500,
|
||||
'filter': ['statuscode:200'] + (filters or []),
|
||||
'collapse': collapse or [],
|
||||
**(query or {})
|
||||
**(query or {}),
|
||||
}
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
return [dict(zip(res[0], v)) for v in res[1:]]
|
||||
elif not isinstance(res, list) or len(res) != 0:
|
||||
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
|
||||
|
||||
@ -854,7 +875,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
{
|
||||
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
|
||||
'filesize': int_or_none(thumbnail_dict.get('length')),
|
||||
'preference': int_or_none(thumbnail_dict.get('length'))
|
||||
'preference': int_or_none(thumbnail_dict.get('length')),
|
||||
} for thumbnail_dict in response)
|
||||
if not try_all:
|
||||
break
|
||||
@ -895,7 +916,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
@ -926,258 +947,24 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
url = urllib.parse.unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
||||
if itag and itag in YoutubeIE._formats:
|
||||
format.update(YoutubeIE._formats[itag])
|
||||
format.update({'format_id': itag})
|
||||
fmt.update(YoutubeIE._formats[itag])
|
||||
fmt.update({'format_id': itag})
|
||||
else:
|
||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
||||
ext = (mimetype2ext(mime)
|
||||
or urlhandle_detect_ext(urlh)
|
||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
||||
format.update({'ext': ext})
|
||||
info['formats'] = [format]
|
||||
fmt.update({'ext': ext})
|
||||
info['formats'] = [fmt]
|
||||
if not info.get('duration'):
|
||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@ -11,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor):
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
entries.append(f'arcpublishing:{org}:{uuid}')
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
if all(f.get('acodec') == 'none' for f in m3u8_formats):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
height = f.get('height')
|
||||
@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
@ -1,24 +1,25 @@
|
||||
import json
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
qualities,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
@ -61,45 +62,6 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _ARD_extract_episode_info(self, title):
|
||||
"""Try to extract season/episode data from the title."""
|
||||
res = {}
|
||||
if not title:
|
||||
return res
|
||||
|
||||
for pattern in [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
]:
|
||||
m = re.match(pattern, title)
|
||||
if m:
|
||||
groupdict = m.groupdict()
|
||||
res['season_number'] = int_or_none(groupdict.get('season_number'))
|
||||
res['episode_number'] = int_or_none(groupdict.get('episode_number'))
|
||||
res['episode'] = str_or_none(groupdict.get('episode'))
|
||||
# Build the episode title by removing numeric episode information:
|
||||
if groupdict.get('ep_info') and not res['episode']:
|
||||
res['episode'] = str_or_none(
|
||||
title.replace(groupdict.get('ep_info'), ''))
|
||||
if res['episode']:
|
||||
res['episode'] = res['episode'].strip()
|
||||
break
|
||||
|
||||
# As a fallback use the whole title as the episode name:
|
||||
if not res.get('episode'):
|
||||
res['episode'] = title.strip()
|
||||
return res
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
@ -123,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
'plugin': 'aasp-3.1.1.69.124',
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@ -134,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
'format_id': f'a{num}-rtmp-{quality}',
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
'format_id': f'a{num}-{ext}-{quality}',
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
@ -155,144 +117,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
|
||||
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = self._match_valid_url(url)
|
||||
|
||||
document_id = None
|
||||
|
||||
numid = re.search(r'documentId=([0-9]+)', url)
|
||||
if numid:
|
||||
document_id = video_id = numid.group(1)
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
# structure altogether.
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
media_streams = re.findall(r'''(?x)
|
||||
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
|
||||
"([^"]+)"''', webpage)
|
||||
|
||||
if media_streams:
|
||||
QUALITIES = qualities(['lo', 'hi', 'hq'])
|
||||
formats = []
|
||||
for furl in set(media_streams):
|
||||
if furl.endswith('.f4m'):
|
||||
fid = 'f4m'
|
||||
else:
|
||||
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
|
||||
fid = fid_m.group(1) if fid_m else None
|
||||
formats.append({
|
||||
'quality': QUALITIES(fid),
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||
webpage, 'media id', default=None)
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 7.12.2023
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
||||
'md5': '94812e6438488fb923c361a44469614b',
|
||||
'info_dict': {
|
||||
'id': 'maischberger-video-424',
|
||||
'display_id': 'maischberger-video-424',
|
||||
@ -399,31 +229,36 @@ class ARDIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:(?P<client>[^/]+)/)?
|
||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
(?:[^?#]+/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||
'id': '12172961',
|
||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||
'duration': 5241,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||
'timestamp': 1670710500,
|
||||
'upload_date': '20221210',
|
||||
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'id': '12939099',
|
||||
'title': 'Liebe auf vier Pfoten',
|
||||
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||
'duration': 5222,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||
'timestamp': 1701343800,
|
||||
'upload_date': '20231130',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 12,
|
||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||
'series': 'Filme im MDR'
|
||||
'episode': 'Liebe auf vier Pfoten',
|
||||
'series': 'Filme im MDR',
|
||||
'age_limit': 0,
|
||||
'channel': 'MDR',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@ -450,11 +285,49 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||
'channel': 'ARD-Aktuell',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'md5': 'c428b9effff18ff624d4f903bda26315',
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
'timestamp': 1696491171,
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@ -471,203 +344,260 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_episode_info(self, title):
|
||||
patterns = [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
# As a fallback use the full title
|
||||
r'(?P<title>.*)',
|
||||
]
|
||||
|
||||
return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'episode': ((
|
||||
('episode', {str_or_none}),
|
||||
('ep_info', {lambda x: title.replace(x, '')}),
|
||||
('title', {str}),
|
||||
), {str.strip}),
|
||||
}), get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
|
||||
if old_id is not None:
|
||||
video_id = str(old_id)
|
||||
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
|
||||
else:
|
||||
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
|
||||
video_id = display_id
|
||||
archive_ids = None
|
||||
|
||||
player_data = traverse_obj(
|
||||
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
|
||||
is_live = player_data.get('type') == 'player_live'
|
||||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for stream in traverse_obj(media_data, ('streams', ..., {dict})):
|
||||
kind = stream.get('kind')
|
||||
# Prioritize main stream over sign language and others
|
||||
preference = 1 if kind == 'main' else None
|
||||
for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
|
||||
media_url = media['url']
|
||||
|
||||
audio_kind = traverse_obj(media, (
|
||||
'audios', 0, 'kind', {str}), default='').replace('standard', '')
|
||||
lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
|
||||
lang = join_nonempty(lang_code, audio_kind)
|
||||
language_preference = 10 if lang == 'deu' else -10
|
||||
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
|
||||
for f in fmts:
|
||||
f['language'] = lang
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': f'http-{kind}',
|
||||
'preference': preference,
|
||||
'language': lang,
|
||||
'language_preference': language_preference,
|
||||
**traverse_obj(media, {
|
||||
'format_note': ('forcedLabel', {str}),
|
||||
'width': ('maxHResolutionPx', {int_or_none}),
|
||||
'height': ('maxVResolutionPx', {int_or_none}),
|
||||
'vcodec': ('videoCodec', {str}),
|
||||
}),
|
||||
})
|
||||
|
||||
for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
|
||||
for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
|
||||
subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
|
||||
'url': sources['url'],
|
||||
'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
|
||||
})
|
||||
|
||||
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
|
||||
'series': 'seriesTitle',
|
||||
'thumbnail': ('images', 0, 'url', {url_or_none}),
|
||||
'duration': ('durationSeconds', {int_or_none}),
|
||||
'channel': 'clipSourceName',
|
||||
})),
|
||||
**self._extract_episode_info(page_data.get('title')),
|
||||
'_old_archive_ids': archive_ids,
|
||||
}
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
(?:(?P<display_id>[^?#]+?)/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
(?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
|
||||
'display_id': 'quiz/staffel-1-originalversion',
|
||||
'title': 'Staffel 1 Originalversion',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
|
||||
'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
|
||||
'title': 'Staffel 4 mit Audiodeskription',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
|
||||
'display_id': 'babylon-berlin/staffel-1',
|
||||
'title': 'Staffel 1',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'display_id': 'tatort',
|
||||
'title': 'Tatort',
|
||||
},
|
||||
'playlist_mincount': 500,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'info_dict': {
|
||||
'id': '5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'display_id': 'die-kirche-bleibt-im-dorf',
|
||||
'title': 'Die Kirche bleibt im Dorf',
|
||||
'description': 'Die Kirche bleibt im Dorf',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# playlist of type 'sendung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'serie'
|
||||
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'sammlung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
||||
""" Query the ARD server for playlist information
|
||||
and returns the data in "raw" format """
|
||||
if mode == 'sendung':
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
showPage(
|
||||
client: "%s"
|
||||
showId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
else: # mode == 'sammlung'
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
morePage(
|
||||
client: "%s"
|
||||
compilationId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
widget {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
# Ressources for ARD graphQL debugging:
|
||||
# https://api-test.ardmediathek.de/public-gateway
|
||||
show_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
'[Playlist] %s' % display_id,
|
||||
data=graphQL,
|
||||
headers={'Content-Type': 'application/json'})['data']
|
||||
# align the structure of the returned data:
|
||||
if mode == 'sendung':
|
||||
show_page = show_page['showPage']
|
||||
else: # mode == 'sammlung'
|
||||
show_page = show_page['morePage']['widget']
|
||||
return show_page
|
||||
|
||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||
""" Collects all playlist entries and returns them as info dict.
|
||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
||||
playlists. """
|
||||
entries = []
|
||||
pageNumber = 0
|
||||
while True: # iterate by pageNumber
|
||||
show_page = self._ARD_load_playlist_snipped(
|
||||
playlist_id, display_id, client, mode, pageNumber)
|
||||
for teaser in show_page['teasers']: # process playlist items
|
||||
if '/compilation/' in teaser['links']['target']['href']:
|
||||
# alternativ cond.: teaser['type'] == "compilation"
|
||||
# => This is an nested compilation, e.g. like:
|
||||
# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
|
||||
link_mode = 'sammlung'
|
||||
else:
|
||||
link_mode = 'video'
|
||||
|
||||
item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
|
||||
client, link_mode, display_id,
|
||||
# perform HTLM quoting of episode title similar to ARD:
|
||||
re.sub('^-|-$', '', # remove '-' from begin/end
|
||||
re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
|
||||
teaser['links']['target']['title'].lower()
|
||||
.replace('ä', 'ae').replace('ö', 'oe')
|
||||
.replace('ü', 'ue').replace('ß', 'ss'))),
|
||||
teaser['links']['target']['id'])
|
||||
entries.append(self.url_result(
|
||||
item_url,
|
||||
ie=ARDBetaMediathekIE.ie_key()))
|
||||
|
||||
if (show_page['pagination']['pageSize'] * (pageNumber + 1)
|
||||
>= show_page['pagination']['totalElements']):
|
||||
# we've processed enough pages to get all playlist entries
|
||||
break
|
||||
pageNumber = pageNumber + 1
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client', 'season')
|
||||
display_id, client = display_id or video_id, client or 'ard'
|
||||
playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'season', 'version')
|
||||
|
||||
if playlist_type:
|
||||
# TODO: Extract only specified season
|
||||
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
|
||||
def call_api(page_num):
|
||||
api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
|
||||
return self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
|
||||
f'Downloading playlist page {page_num}', query={
|
||||
'pageNumber': page_num,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
**({
|
||||
'seasoned': 'true',
|
||||
'seasonNumber': season_number,
|
||||
'withOriginalversion': 'true' if version == 'OV' else 'false',
|
||||
'withAudiodescription': 'true' if version == 'AD' else 'false',
|
||||
} if season_number else {}),
|
||||
})
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
src
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (client, video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
'thumbnail': (media_collection.get('_previewImage')
|
||||
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||
or self.get_thumbnail_from_html(display_id, url)),
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
return info
|
||||
def fetch_page(page_num):
|
||||
for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
|
||||
item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
|
||||
if not item_id or item_id == playlist_id:
|
||||
continue
|
||||
item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
|
||||
yield self.url_result(
|
||||
f'https://www.ardmediathek.de/{item_mode}/{item_id}',
|
||||
ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
|
||||
**traverse_obj(item, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('longTitle', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('broadcastedOn', {parse_iso8601}),
|
||||
}))
|
||||
|
||||
def get_thumbnail_from_html(self, display_id, url):
|
||||
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||
return (
|
||||
self._og_search_thumbnail(webpage, default=None)
|
||||
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||
page_data = call_api(0)
|
||||
full_id = join_nonempty(playlist_id, season_number, version, delim='_')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
|
||||
title=page_data.get('title'), description=page_data.get('synopsis'))
|
||||
|
@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
@ -1,11 +1,9 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
'start_time': int_or_none(urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
|
303
plugins/youtube_download/yt_dlp/extractor/art19.py
Normal file
303
plugins/youtube_download/yt_dlp/extractor/art19.py
Normal file
@ -0,0 +1,303 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Art19IE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||
_VALID_URL = [
|
||||
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||
'info_dict': {
|
||||
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'ext': 'mp3',
|
||||
'title': 'Why Did DeSantis Drop Out?',
|
||||
'series': 'The Daily Briefing',
|
||||
'release_timestamp': 1705941275,
|
||||
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||
'episode': 'Episode 582',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||
'upload_date': '20240122',
|
||||
'timestamp': 1705940815,
|
||||
'episode_number': 582,
|
||||
'modified_date': '20240122',
|
||||
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'modified_timestamp': 1705941275,
|
||||
'release_date': '20240122',
|
||||
'duration': 527.4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'info_dict': {
|
||||
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||
'modified_date': '20240116',
|
||||
'upload_date': '20240105',
|
||||
'modified_timestamp': 1705435802,
|
||||
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||
'release_timestamp': 1705305660,
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1704481536,
|
||||
'episode_number': 88,
|
||||
'series': 'Scamfluencers',
|
||||
'duration': 2588.37501,
|
||||
'episode': 'Episode 88',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||
'info_dict': {
|
||||
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'ext': 'mp3',
|
||||
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||
'season': 'Seizoen 6',
|
||||
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'duration': 3061.82111,
|
||||
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||
'release_date': '20231126',
|
||||
'modified_timestamp': 1701156004,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'season_number': 6,
|
||||
'episode_number': 52,
|
||||
'modified_date': '20231128',
|
||||
'upload_date': '20231126',
|
||||
'timestamp': 1701025981,
|
||||
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||
'series': 'De Boordradio',
|
||||
'release_timestamp': 1701026308,
|
||||
'episode': 'Episode 52',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||
'info_dict': {
|
||||
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Bucshon announces retirement from congress',
|
||||
'upload_date': '20240115',
|
||||
'episode_number': 148,
|
||||
'episode': 'Episode 148',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1705328205,
|
||||
'release_timestamp': 1705329275,
|
||||
'series': 'All INdiana Politics',
|
||||
'modified_date': '20240117',
|
||||
'modified_timestamp': 1705458901,
|
||||
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||
'duration': 1256.18848,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for episode_id in re.findall(
|
||||
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
|
||||
player_metadata = self._download_json(
|
||||
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||
note='Downloading player metadata', fatal=False,
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
rss_metadata = self._download_json(
|
||||
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||
note='Downloading RSS metadata')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
}]
|
||||
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||
if fmt_id == 'waveform_bin':
|
||||
continue
|
||||
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||
if not fmt_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': fmt_id,
|
||||
'url': fmt_url,
|
||||
'vcodec': 'none',
|
||||
'acodec': fmt_id,
|
||||
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'episode_id': ('id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
'description': ('episode_description_plain', {str}),
|
||||
'episode_id': ('episode_id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'thumbnail': ('cover_image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class Art19ShowIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||
_VALID_URL = [
|
||||
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://rss.art19.com/scamfluencers',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'display_id': 'scamfluencers',
|
||||
'title': 'Scamfluencers',
|
||||
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||
'timestamp': 1647368573,
|
||||
'upload_date': '20220315',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||
'display_id': 'enthuellt',
|
||||
'title': 'Enthüllt',
|
||||
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||
'timestamp': 1601645860,
|
||||
'upload_date': '20201002',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||
'display_id': 'deconstructing-yourself',
|
||||
'title': 'Deconstructing Yourself',
|
||||
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||
'timestamp': 1570581181,
|
||||
'upload_date': '20191009',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||
'display_id': 'the-ben-joravsky-show',
|
||||
'title': 'The Ben Joravsky Show',
|
||||
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||
'timestamp': 1550875095,
|
||||
'upload_date': '20190222',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||
},
|
||||
'playlist_mincount': 1900,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for series_id in re.findall(
|
||||
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||
yield f'https://art19.com/shows/{series_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_metadata = self._download_json(
|
||||
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [
|
||||
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||
],
|
||||
**traverse_obj(series_metadata, ('series', {
|
||||
'id': ('id', {str}),
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||
}
|
@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
@ -19,46 +20,22 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = rf'''(?x)
|
||||
(?:https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
|
||||
)
|
||||
|arte://program)
|
||||
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
/(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'info_dict': {
|
||||
'id': '100103-000-A',
|
||||
'title': 'USA: Dyskryminacja na porodówce',
|
||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
||||
'alt_title': 'ARTE Reportage',
|
||||
'upload_date': '20201103',
|
||||
'duration': 554,
|
||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
||||
'timestamp': 1604417980,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
'info_dict': {
|
||||
'id': '110371-000-A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220718',
|
||||
'duration': 154,
|
||||
'timestamp': 1658162460,
|
||||
'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
|
||||
'title': 'La chaleur, supplice des arbres de rue',
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
@ -67,19 +44,38 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
|
||||
'info_dict': {
|
||||
'id': '110203-006-A',
|
||||
'chapters': 'count:16',
|
||||
'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
|
||||
'alt_title': 'Zaz',
|
||||
'title': 'Baloise Session 2022',
|
||||
'timestamp': 1668445200,
|
||||
'duration': 4054,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
|
||||
'upload_date': '20221114',
|
||||
'id': '109067-000-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
|
||||
'timestamp': 1713927600,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
|
||||
'duration': 7599,
|
||||
'title': 'La loi de Téhéran',
|
||||
'upload_date': '20240424',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
'fr-forced': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
'info_dict': {
|
||||
'id': '006785-000-A',
|
||||
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
|
||||
'title': 'The Element of Crime',
|
||||
'timestamp': 1696111200,
|
||||
'duration': 5849,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'expected_warnings': ['geo restricted']
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
@ -130,13 +126,27 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_accessible_subs_locale(subs):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for fmt in sub_formats:
|
||||
url = fmt.get('url') or ''
|
||||
suffix = ('acc' if url.endswith('-MAL.m3u8')
|
||||
else 'forced' if '_VO' not in url
|
||||
else None)
|
||||
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
language_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18',
|
||||
})
|
||||
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
if geoblocking.get('restrictedArea'):
|
||||
@ -160,16 +170,16 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||
if m:
|
||||
lang_pref = int(''.join('01'[x] for x in (
|
||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
||||
m.group('vlang') == language_code, # we prefer voice in the requested language
|
||||
not m.group('audio_desc'), # and not the audio description version
|
||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
||||
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
|
||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
if 'HLS' in stream['protocol']:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
@ -181,6 +191,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
subs = self._fix_accessible_subs_locale(subs)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
@ -236,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
'skip': 'No video available'
|
||||
'skip': 'No video available',
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
@ -251,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'only_matching': True,
|
||||
@ -287,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
@ -301,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
|
||||
and super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -310,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
|
||||
items.append(video)
|
||||
|
||||
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
|
||||
|
@ -1,196 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
168
plugins/youtube_download/yt_dlp/extractor/asobichannel.py
Normal file
168
plugins/youtube_download/yt_dlp/extractor/asobichannel.py
Normal file
@ -0,0 +1,168 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiChannelBaseIE(InfoExtractor):
|
||||
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
|
||||
|
||||
def _extract_info(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
})
|
||||
|
||||
|
||||
class AsobiChannelIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
|
||||
'md5': '39df74e872afe032c4eb27b89144fc92',
|
||||
'info_dict': {
|
||||
'id': '1ypp48qd32p',
|
||||
'ext': 'mp4',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
|
||||
'timestamp': 1697098247,
|
||||
'upload_date': '20231012',
|
||||
'modified_timestamp': 1698381162,
|
||||
'modified_date': '20231027',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
|
||||
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
|
||||
'info_dict': {
|
||||
'id': 'redigiwnjzqj',
|
||||
'ext': 'mp4',
|
||||
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
|
||||
'modified_timestamp': 1697797125,
|
||||
'modified_date': '20231020',
|
||||
'timestamp': 1697261769,
|
||||
'upload_date': '20231014',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}]
|
||||
|
||||
_survapi_header = None
|
||||
|
||||
def _real_initialize(self):
|
||||
token = self._download_json(
|
||||
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
|
||||
note='Retrieving API token')
|
||||
self._survapi_header = {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _process_vod(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
|
||||
headers=self._survapi_header, note='Downloading vod data')
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
|
||||
}
|
||||
|
||||
def _process_live(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
event_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
|
||||
headers=self._survapi_header, note='Downloading event data')
|
||||
|
||||
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
|
||||
if player_type == 'poster':
|
||||
self.raise_no_formats('Live event has not yet started', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
formats = []
|
||||
elif player_type == 'player':
|
||||
live_status = 'is_live'
|
||||
formats = self._extract_m3u8_formats(
|
||||
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
|
||||
else:
|
||||
raise ExtractorError('Unsupported player type {player_type!r}')
|
||||
|
||||
return {
|
||||
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
|
||||
'live_status': live_status,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
|
||||
headers=self._MICROCMS_HEADER)
|
||||
|
||||
info = self._extract_info(metadata)
|
||||
|
||||
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
|
||||
if video_type == 'VOD':
|
||||
return merge_dicts(info, self._process_vod(video_id, metadata))
|
||||
if video_type == 'LIVE':
|
||||
return merge_dicts(info, self._process_live(video_id, metadata))
|
||||
|
||||
raise ExtractorError(f'Unexpected video type {video_type!r}')
|
||||
|
||||
|
||||
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel:tag'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
|
||||
'info_dict': {
|
||||
'id': 'bjhh-nbcja',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
|
||||
'info_dict': {
|
||||
'id': 'hvm5qw3c6od',
|
||||
'title': 'アイマスMOIW2023ラジオ',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tag_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, tag_id)
|
||||
title = traverse_obj(self._search_nextjs_data(
|
||||
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
|
||||
|
||||
media = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
|
||||
tag_id, headers=self._MICROCMS_HEADER)
|
||||
|
||||
def entries():
|
||||
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
|
||||
'ie_key': AsobiChannelIE.ie_key(),
|
||||
**self._extract_info(metadata),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), tag_id, title)
|
155
plugins/youtube_download/yt_dlp/extractor/asobistage.py
Normal file
155
plugins/youtube_download/yt_dlp/extractor/asobistage.py
Normal file
@ -0,0 +1,155 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiStageIE(InfoExtractor):
|
||||
IE_DESC = 'ASOBISTAGE (アソビステージ)'
|
||||
_VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame',
|
||||
'info_dict': {
|
||||
'id': '315passionhour_2022summer/archive/frame',
|
||||
'title': '315プロダクションプレゼンツ 315パッションアワー!!!',
|
||||
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'edff52f2',
|
||||
'ext': 'mp4',
|
||||
'title': '315passion_FRAME_only',
|
||||
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live',
|
||||
'info_dict': {
|
||||
'id': 'idolmaster_idolworld2023_goods/archive/live',
|
||||
'title': 'md5:378510b6e830129d505885908bd6c576',
|
||||
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3aef7110',
|
||||
'ext': 'mp4',
|
||||
'title': 'asobistore_station_1020_serverREC',
|
||||
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'id': 'sidem_fclive_bpct/archive/premium_hc',
|
||||
'title': '315 Production presents F@NTASTIC COMBINATION LIVE ~BRAINPOWER!!~/~CONNECTIME!!!!~',
|
||||
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_HOST = 'https://asobistage-api.asobistore.jp'
|
||||
_HEADERS = {}
|
||||
_is_logged_in = False
|
||||
|
||||
@functools.cached_property
|
||||
def _owned_tickets(self):
|
||||
owned_tickets = set()
|
||||
if not self._is_logged_in:
|
||||
return owned_tickets
|
||||
|
||||
for path, name in [
|
||||
('api/v1/purchase_history/list', 'ticket purchase history'),
|
||||
('api/v1/serialcode/list', 'redemption history'),
|
||||
]:
|
||||
response = self._download_json(
|
||||
f'{self._API_HOST}/{path}', None, f'Downloading {name}',
|
||||
f'Unable to download {name}', expected_status=400)
|
||||
if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin':
|
||||
self._is_logged_in = False
|
||||
break
|
||||
owned_tickets.update(
|
||||
traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none})))
|
||||
|
||||
return owned_tickets
|
||||
|
||||
def _get_available_channel_id(self, channel):
|
||||
channel_id = traverse_obj(channel, ('chennel_vspf_id', {str}))
|
||||
if not channel_id:
|
||||
return None
|
||||
# if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)'
|
||||
if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)):
|
||||
return channel_id
|
||||
available_tickets = traverse_obj(channel, (
|
||||
'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none}))
|
||||
if not self._owned_tickets.intersection(available_tickets):
|
||||
self.report_warning(
|
||||
f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"')
|
||||
return None
|
||||
return channel_id
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._get_cookies(self._API_HOST):
|
||||
self._is_logged_in = True
|
||||
token = self._download_json(
|
||||
f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token')
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
'title': ('event_name', {str}),
|
||||
'thumbnail': ('event_thumbnail_image', {url_or_none}),
|
||||
}))
|
||||
|
||||
available_channels = traverse_obj(self._download_json(
|
||||
f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json',
|
||||
video_id, 'Getting channel list', 'Unable to get channel list'), (
|
||||
video_type, lambda _, v: v['broadcast_slug'] == slug,
|
||||
'channels', lambda _, v: v['chennel_vspf_id'] != '00000'))
|
||||
|
||||
entries = []
|
||||
for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})):
|
||||
if video_type == 'archives':
|
||||
channel_json = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id,
|
||||
'Getting archive channel info', 'Unable to get archive channel info', fatal=False,
|
||||
headers=self._HEADERS)
|
||||
channel_data = traverse_obj(channel_json, ('ex_content', {
|
||||
'm3u8_url': 'streaming_url',
|
||||
'title': 'title',
|
||||
'thumbnail': ('thumbnail', 'url'),
|
||||
}))
|
||||
else: # video_type == 'broadcasts'
|
||||
channel_json = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id,
|
||||
'Getting live channel info', 'Unable to get live channel info', fatal=False,
|
||||
headers=self._HEADERS, query={'embed': 'channel'})
|
||||
channel_data = traverse_obj(channel_json, ('data', {
|
||||
'm3u8_url': ('Channel', 'Custom_live_url'),
|
||||
'title': 'Name',
|
||||
'thumbnail': 'Poster_url',
|
||||
}))
|
||||
|
||||
entries.append({
|
||||
'id': channel_id,
|
||||
'title': channel_data.get('title'),
|
||||
'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False),
|
||||
'is_live': video_type == 'broadcasts',
|
||||
'thumbnail': url_or_none(channel_data.get('thumbnail')),
|
||||
})
|
||||
|
||||
if not self._is_logged_in and not entries:
|
||||
self.raise_login_required()
|
||||
|
||||
return self.playlist_result(entries, video_id, **event_data)
|
@ -1,104 +1,152 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f2dfb2fb6ab0e4c7203849',
|
||||
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
|
||||
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
|
||||
'channel': 'laSexta',
|
||||
'duration': 31,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
|
||||
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
|
||||
'series': 'El Objetivo',
|
||||
'season': 'Temporada 12',
|
||||
'timestamp': 1743970079,
|
||||
'upload_date': '20250406',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f836baa4a5b0e4147ca59a',
|
||||
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
|
||||
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
|
||||
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
|
||||
'channel': 'Antena 3',
|
||||
'duration': 2556,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
|
||||
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
|
||||
'series': 'El Hormiguero ',
|
||||
'season': 'Temporada 14',
|
||||
'timestamp': 1744320111,
|
||||
'upload_date': '20250410',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67a6038b64ceca00070f4f69',
|
||||
'display_id': 'capitulo-3-supervivientes',
|
||||
'title': 'Capítulo 3: Supervivientes',
|
||||
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
|
||||
'channel': 'Flooxer',
|
||||
'duration': 1196,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
|
||||
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
|
||||
'series': 'BIARA: Proyecto Lázarus',
|
||||
'season': 'Temporada 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1743095191,
|
||||
'upload_date': '20250327',
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=urlencode_postdata({
|
||||
self._download_webpage(
|
||||
'https://account.atresplayer.com/auth/v1/login', None,
|
||||
'Logging in', 'Failed to log in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
metadata_url = self._download_json(
|
||||
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
|
||||
query={'href': urllib.parse.urlparse(url).path})['href']
|
||||
metadata = self._download_json(metadata_url, video_id)
|
||||
|
||||
title = episode['titulo']
|
||||
try:
|
||||
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src_url = source['src']
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats, subtitles = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats, subtitles = self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('titulo', {str}),
|
||||
'description': ('descripcion', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
'series': ('format', 'title', {str}),
|
||||
'season': ('currentSeason', 'title', {str}),
|
||||
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
|
||||
'episode_number': ('numberOfEpisode', {int_or_none}),
|
||||
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
}),
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'data-scale-spring-2022',
|
||||
'title': 'Data @Scale Spring 2022',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://atscaleconference.com/events/video-scale-2021/',
|
||||
@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'video-scale-2021',
|
||||
'title': 'Video @Scale 2021',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
|
||||
ie='Generic', playlist_id=id,
|
||||
ie='Generic', playlist_id=playlist_id,
|
||||
title=self._og_search_title(webpage), description=self._og_search_description(webpage))
|
||||
|
@ -1,53 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ATTTechChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'info_dict': {
|
||||
'id': '11316',
|
||||
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'ext': 'flv',
|
||||
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
|
||||
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'(rtmp://[^']+)'",
|
||||
webpage, 'video URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'mediaid\s*=\s*(\d+)',
|
||||
webpage, 'video id', fatal=False)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
|
||||
webpage, 'upload date', fatal=False), False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
import datetime
|
||||
import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
|
||||
'id': 'v-ce9cgn1e70n5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
|
||||
'only_matching': True,
|
||||
@ -66,14 +66,14 @@ class ATVAtIE(InfoExtractor):
|
||||
video_id=video_id)
|
||||
|
||||
video_title = json_data['views']['default']['page']['title']
|
||||
contentResource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = contentResource[0]['id']
|
||||
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id, content in enumerate(contentResource)]
|
||||
content_resource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = content_resource[0]['id']
|
||||
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id_, content in enumerate(content_resource)]
|
||||
|
||||
time_of_request = datetime.datetime.now()
|
||||
not_before = time_of_request - datetime.timedelta(minutes=5)
|
||||
expire = time_of_request + datetime.timedelta(minutes=5)
|
||||
time_of_request = dt.datetime.now()
|
||||
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||
expire = time_of_request + dt.timedelta(minutes=5)
|
||||
payload = {
|
||||
'content_ids': {
|
||||
content_id: content_ids,
|
||||
@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
|
||||
videos = self._download_json(
|
||||
'https://vas-v4.p7s1video.net/4.0/getsources',
|
||||
content_id, 'Downloading videos JSON', query={
|
||||
'token': jwt_token.decode('utf-8')
|
||||
'token': jwt_token.decode('utf-8'),
|
||||
})
|
||||
|
||||
video_id, videos_data = list(videos['data'].items())[0]
|
||||
video_id, videos_data = next(iter(videos['data'].items()))
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, contentResource[video['id']], video)
|
||||
self._extract_video_info(url, content_resource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
||||
return {
|
||||
|
@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
'timestamp': 1448354940,
|
||||
'duration': 74022,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||
'only_matching': True,
|
||||
@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
|
||||
if bitrate:
|
||||
f.update({
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
'format_id': f'http-{bitrate}',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
|
@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
},
|
||||
}, { # Direct mp3-file link
|
||||
'url': 'https://audioboom.com/posts/8128496.mp3',
|
||||
'md5': 'e329edf304d450def95c7f86a9165ee1',
|
||||
@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 1689.7,
|
||||
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
|
@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
|
||||
headers={
|
||||
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=f'id={player_entry_id}'.encode('utf-8'))
|
||||
}, data=f'id={player_entry_id}'.encode())
|
||||
|
||||
return {
|
||||
'id': str(data_json['entry_id']),
|
||||
@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_entry_id = self._search_regex(
|
||||
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
|
||||
return self._audiodraft_extract_from_id(player_entry_id)
|
||||
|
||||
|
||||
@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{id}')
|
||||
video_id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{video_id}')
|
||||
|
@ -3,7 +3,6 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
'title': 'Extraordinary',
|
||||
},
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
|
||||
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s' % url)
|
||||
raise ExtractorError(f'Invalid url {url}')
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
'id': str(api_response.get('id', album_url_tag)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)',
|
||||
},
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'id': '837576',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||
'id': '837580',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
note=f'Querying song information ({track_no + 1})')
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = compat_str(api_response[apikey])
|
||||
result[resultkey] = str(api_response[apikey])
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'id': str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
|
@ -1,7 +1,7 @@
|
||||
import random
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError, str_or_none, try_get
|
||||
|
||||
|
||||
@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if response_data is not None:
|
||||
return response_data
|
||||
if len(response) == 1 and 'message' in response:
|
||||
raise ExtractorError('API error: %s' % response['message'],
|
||||
raise ExtractorError('API error: {}'.format(response['message']),
|
||||
expected=True)
|
||||
raise ExtractorError('Unexpected API response')
|
||||
|
||||
def _select_api_base(self):
|
||||
"""Selecting one of the currently available API hosts"""
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
response = super()._download_json(
|
||||
'https://api.audius.co/', None,
|
||||
note='Requesting available API hosts',
|
||||
errnote='Unable to request available API hosts')
|
||||
@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
|
||||
anything from this link, since the Audius API won't be able to resolve
|
||||
this url
|
||||
"""
|
||||
url = compat_urllib_parse_unquote(url)
|
||||
title = compat_urllib_parse_unquote(title)
|
||||
url = urllib.parse.unquote(url)
|
||||
title = urllib.parse.unquote(title)
|
||||
if '/' in title or '%2F' in title:
|
||||
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
|
||||
return url.replace(title, fixed_title)
|
||||
@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if self._API_BASE is None:
|
||||
self._select_api_base()
|
||||
try:
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
|
||||
response = super()._download_json(
|
||||
f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
|
||||
errnote=errnote, expected_status=expected_status)
|
||||
except ExtractorError as exc:
|
||||
# some of Audius API hosts may not work as expected and return HTML
|
||||
if 'Failed to parse JSON' in compat_str(exc):
|
||||
if 'Failed to parse JSON' in str(exc):
|
||||
raise ExtractorError('An error occurred while receiving data. Try again',
|
||||
expected=True)
|
||||
raise exc
|
||||
return self._get_response_data(response)
|
||||
|
||||
def _resolve_url(self, url, item_id):
|
||||
return self._api_request('/resolve?url=%s' % url, item_id,
|
||||
return self._api_request(f'/resolve?url={url}', item_id,
|
||||
expected_status=404)
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
# Regular track
|
||||
@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_ARTWORK_MAP = {
|
||||
"150x150": 150,
|
||||
"480x480": 480,
|
||||
"1000x1000": 1000
|
||||
'150x150': 150,
|
||||
'480x480': 480,
|
||||
'1000x1000': 1000,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
else: # API link
|
||||
title = None
|
||||
# uploader = None
|
||||
track_data = self._api_request('/tracks/%s' % track_id, track_id)
|
||||
track_data = self._api_request(f'/tracks/{track_id}', track_id)
|
||||
|
||||
if not isinstance(track_data, dict):
|
||||
raise ExtractorError('Unexpected API response')
|
||||
@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
if isinstance(artworks_data, dict):
|
||||
for quality_key, thumbnail_url in artworks_data.items():
|
||||
thumbnail = {
|
||||
"url": thumbnail_url
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
quality_code = self._ARTWORK_MAP.get(quality_key)
|
||||
if quality_code is not None:
|
||||
@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': track_data.get('title', title),
|
||||
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
|
||||
'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
|
||||
'ext': 'mp3',
|
||||
'description': track_data.get('description'),
|
||||
'duration': track_data.get('duration'),
|
||||
'track': track_data.get('title'),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], str),
|
||||
'genre': track_data.get('genre'),
|
||||
'thumbnails': thumbnails,
|
||||
'view_count': track_data.get('play_count'),
|
||||
@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'audius:9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
if not track_id:
|
||||
raise ExtractorError('Unable to get track ID from playlist')
|
||||
entries.append(self.url_result(
|
||||
'audius:%s' % track_id,
|
||||
f'audius:{track_id}',
|
||||
ie=AudiusTrackIE.ie_key(), video_id=track_id))
|
||||
return entries
|
||||
|
||||
@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
raise ExtractorError('Unable to get playlist ID')
|
||||
|
||||
playlist_tracks = self._api_request(
|
||||
'/playlists/%s/tracks' % playlist_id,
|
||||
f'/playlists/{playlist_id}/tracks',
|
||||
title, note='Downloading playlist tracks metadata',
|
||||
errnote='Unable to download playlist tracks metadata')
|
||||
if not isinstance(playlist_tracks, list):
|
||||
@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
|
||||
profile_audius_id = _profile_data[0]['id']
|
||||
profile_bio = _profile_data[0].get('bio')
|
||||
|
||||
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
|
||||
api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
|
||||
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
|
||||
|
@ -1,10 +1,7 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
|
||||
show_id, video_id, season_id = self._match_valid_url(url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
f'http://awaan.ae/program/season/{season_id}',
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
|
||||
|
||||
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
video_id = str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
||||
|
@ -1,9 +1,9 @@
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
@ -12,26 +12,26 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
'X-Api-Key': self._AWS_API_KEY,
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
return hashlib.sha256(s.encode()).hexdigest()
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_querystring = urllib.parse.urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
canonical_headers += f'{header_name.lower()}:{header_value}\n'
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
canonical_querystring,
|
||||
canonical_headers,
|
||||
signed_headers,
|
||||
aws_hash('')
|
||||
aws_hash(''),
|
||||
])
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
return hmac.new(key, msg.encode(), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode()
|
||||
for value in credential_scope_list:
|
||||
k_signing = aws_hmac_digest(k_signing, value)
|
||||
|
||||
@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
|
||||
headers['Authorization'] = ', '.join([
|
||||
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
'SignedHeaders=%s' % signed_headers,
|
||||
'Signature=%s' % signature,
|
||||
'{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
f'SignedHeaders={signed_headers}',
|
||||
f'Signature={signature}',
|
||||
])
|
||||
|
||||
return self._download_json(
|
||||
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
video_id, headers=headers)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user