Upgrade yt_dlp and download script
This commit is contained in:
		| @@ -8,12 +8,29 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
| function main() { | function main() { | ||||||
|     cd "$(dirname "")" |     _STARGET="${1}" | ||||||
|     echo "Working Dir: " $(pwd) |     _SPATH="${HOME}/.config/solarfm/plugins/youtube_download" | ||||||
|     LINK=`xclip -selection clipboard -o` |     LINK=`xclip -selection clipboard -o` | ||||||
|  |  | ||||||
|     python "${HOME}/.config/solarfm/plugins/youtube_download/yt_dlp/__main__.py" \ |     cd "${_SPATH}" | ||||||
|             --write-sub --embed-sub --sub-langs en \ |     echo "Working Dir: " $(pwd) | ||||||
|             -o "${1}/%(title)s.%(ext)s" "${LINK}" |      | ||||||
|  |     rm "${_SPATH}/../../cookies.txt" | ||||||
|  |  | ||||||
|  |     # Note: Export cookies to file  | ||||||
|  |     python "${_SPATH}/yt_dlp/__main__.py" \ | ||||||
|  |             --cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt" | ||||||
|  |  | ||||||
|  |     # Note: Use cookies from browser directly | ||||||
|  |     # python "${_SPATH}/yt_dlp/__main__.py" \ | ||||||
|  |     #         --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \ | ||||||
|  |     #         -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" | ||||||
|  |  | ||||||
|  |     # Note:  Download video | ||||||
|  |     python "${_SPATH}/yt_dlp/__main__.py" \ | ||||||
|  |             -f "bestvideo[height<=1080][ext=mp4][vcodec^=avc]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ | ||||||
|  |             --cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \ | ||||||
|  |             -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" | ||||||
|  |  | ||||||
| } | } | ||||||
| main "$@"; | main "$@"; | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,10 +1,10 @@ | |||||||
| try: | import sys | ||||||
|     import contextvars  # noqa: F401 |  | ||||||
| except Exception: |  | ||||||
|     raise Exception( |  | ||||||
|         f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp')  # noqa: F541 |  | ||||||
|  |  | ||||||
| __license__ = 'Public Domain' | if sys.version_info < (3, 9): | ||||||
|  |     raise ImportError( | ||||||
|  |         f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp')  # noqa: F541 | ||||||
|  |  | ||||||
|  | __license__ = 'The Unlicense' | ||||||
|  |  | ||||||
| import collections | import collections | ||||||
| import getpass | import getpass | ||||||
| @@ -12,15 +12,16 @@ import itertools | |||||||
| import optparse | import optparse | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import sys |  | ||||||
| import traceback | import traceback | ||||||
|  |  | ||||||
| from .compat import compat_shlex_quote | from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError | ||||||
| from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS |  | ||||||
| from .downloader.external import get_external_downloader | from .downloader.external import get_external_downloader | ||||||
| from .extractor import list_extractor_classes | from .extractor import list_extractor_classes | ||||||
| from .extractor.adobepass import MSO_INFO | from .extractor.adobepass import MSO_INFO | ||||||
|  | from .networking.impersonate import ImpersonateTarget | ||||||
|  | from .globals import IN_CLI, plugin_dirs | ||||||
| from .options import parseOpts | from .options import parseOpts | ||||||
|  | from .plugins import load_all_plugins as _load_all_plugins | ||||||
| from .postprocessor import ( | from .postprocessor import ( | ||||||
|     FFmpegExtractAudioPP, |     FFmpegExtractAudioPP, | ||||||
|     FFmpegMergerPP, |     FFmpegMergerPP, | ||||||
| @@ -43,12 +44,12 @@ from .utils import ( | |||||||
|     GeoUtils, |     GeoUtils, | ||||||
|     PlaylistEntries, |     PlaylistEntries, | ||||||
|     SameFileError, |     SameFileError, | ||||||
|     decodeOption, |  | ||||||
|     download_range_func, |     download_range_func, | ||||||
|     expand_path, |     expand_path, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     format_field, |     format_field, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|     match_filter_func, |     match_filter_func, | ||||||
|     parse_bytes, |     parse_bytes, | ||||||
|     parse_duration, |     parse_duration, | ||||||
| @@ -57,15 +58,15 @@ from .utils import ( | |||||||
|     read_stdin, |     read_stdin, | ||||||
|     render_table, |     render_table, | ||||||
|     setproctitle, |     setproctitle, | ||||||
|  |     shell_quote, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     variadic, |     variadic, | ||||||
|     write_string, |     write_string, | ||||||
| ) | ) | ||||||
| from .utils.networking import std_headers | from .utils.networking import std_headers | ||||||
|  | from .utils._utils import _UnsafeExtensionError | ||||||
| from .YoutubeDL import YoutubeDL | from .YoutubeDL import YoutubeDL | ||||||
|  |  | ||||||
| _IN_CLI = False |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def _exit(status=0, *args): | def _exit(status=0, *args): | ||||||
|     for msg in args: |     for msg in args: | ||||||
| @@ -74,14 +75,16 @@ def _exit(status=0, *args): | |||||||
|  |  | ||||||
|  |  | ||||||
| def get_urls(urls, batchfile, verbose): | def get_urls(urls, batchfile, verbose): | ||||||
|     # Batch file verification |     """ | ||||||
|  |     @param verbose      -1: quiet, 0: normal, 1: verbose | ||||||
|  |     """ | ||||||
|     batch_urls = [] |     batch_urls = [] | ||||||
|     if batchfile is not None: |     if batchfile is not None: | ||||||
|         try: |         try: | ||||||
|             batch_urls = read_batch_urls( |             batch_urls = read_batch_urls( | ||||||
|                 read_stdin('URLs') if batchfile == '-' |                 read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-' | ||||||
|                 else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) |                 else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) | ||||||
|             if verbose: |             if verbose == 1: | ||||||
|                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') |                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') | ||||||
|         except OSError: |         except OSError: | ||||||
|             _exit(f'ERROR: batch file {batchfile} could not be read') |             _exit(f'ERROR: batch file {batchfile} could not be read') | ||||||
| @@ -112,9 +115,9 @@ def print_extractor_information(opts, urls): | |||||||
|             ie.description(markdown=False, search_examples=_SEARCHES) |             ie.description(markdown=False, search_examples=_SEARCHES) | ||||||
|             for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) |             for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) | ||||||
|     elif opts.ap_list_mso: |     elif opts.ap_list_mso: | ||||||
|         out = 'Supported TV Providers:\n%s\n' % render_table( |         out = 'Supported TV Providers:\n{}\n'.format(render_table( | ||||||
|             ['mso', 'mso name'], |             ['mso', 'mso name'], | ||||||
|             [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) |             [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) | ||||||
|     else: |     else: | ||||||
|         return False |         return False | ||||||
|     write_string(out, out=sys.stdout) |     write_string(out, out=sys.stdout) | ||||||
| @@ -126,7 +129,7 @@ def set_compat_opts(opts): | |||||||
|         if name not in opts.compat_opts: |         if name not in opts.compat_opts: | ||||||
|             return False |             return False | ||||||
|         opts.compat_opts.discard(name) |         opts.compat_opts.discard(name) | ||||||
|         opts.compat_opts.update(['*%s' % name]) |         opts.compat_opts.update([f'*{name}']) | ||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): |     def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): | ||||||
| @@ -153,6 +156,9 @@ def set_compat_opts(opts): | |||||||
|             opts.embed_infojson = False |             opts.embed_infojson = False | ||||||
|     if 'format-sort' in opts.compat_opts: |     if 'format-sort' in opts.compat_opts: | ||||||
|         opts.format_sort.extend(FormatSorter.ytdl_default) |         opts.format_sort.extend(FormatSorter.ytdl_default) | ||||||
|  |     elif 'prefer-vp9-sort' in opts.compat_opts: | ||||||
|  |         opts.format_sort.extend(FormatSorter._prefer_vp9_sort) | ||||||
|  |  | ||||||
|     _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) |     _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) | ||||||
|     _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) |     _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) | ||||||
|     if _video_multistreams_set is False and _audio_multistreams_set is False: |     if _video_multistreams_set is False and _audio_multistreams_set is False: | ||||||
| @@ -219,7 +225,7 @@ def validate_options(opts): | |||||||
|         validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') |         validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') | ||||||
|  |  | ||||||
|     if opts.wait_for_video is not None: |     if opts.wait_for_video is not None: | ||||||
|         min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) |         min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) | ||||||
|         validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), |         validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), | ||||||
|                  'time range to wait for video', opts.wait_for_video) |                  'time range to wait for video', opts.wait_for_video) | ||||||
|         validate_minmax(min_wait, max_wait, 'time range to wait for video') |         validate_minmax(min_wait, max_wait, 'time range to wait for video') | ||||||
| @@ -230,6 +236,11 @@ def validate_options(opts): | |||||||
|         validate_regex('format sorting', f, FormatSorter.regex) |         validate_regex('format sorting', f, FormatSorter.regex) | ||||||
|  |  | ||||||
|     # Postprocessor formats |     # Postprocessor formats | ||||||
|  |     if opts.convertsubtitles == 'none': | ||||||
|  |         opts.convertsubtitles = None | ||||||
|  |     if opts.convertthumbnails == 'none': | ||||||
|  |         opts.convertthumbnails = None | ||||||
|  |  | ||||||
|     validate_regex('merge output format', opts.merge_output_format, |     validate_regex('merge output format', opts.merge_output_format, | ||||||
|                    r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) |                    r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) | ||||||
|     validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) |     validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) | ||||||
| @@ -249,9 +260,11 @@ def validate_options(opts): | |||||||
|         elif value in ('inf', 'infinite'): |         elif value in ('inf', 'infinite'): | ||||||
|             return float('inf') |             return float('inf') | ||||||
|         try: |         try: | ||||||
|             return int(value) |             int_value = int(value) | ||||||
|         except (TypeError, ValueError): |         except (TypeError, ValueError): | ||||||
|             validate(False, f'{name} retry count', value) |             validate(False, f'{name} retry count', value) | ||||||
|  |         validate_positive(f'{name} retry count', int_value) | ||||||
|  |         return int_value | ||||||
|  |  | ||||||
|     opts.retries = parse_retries('download', opts.retries) |     opts.retries = parse_retries('download', opts.retries) | ||||||
|     opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) |     opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) | ||||||
| @@ -261,9 +274,9 @@ def validate_options(opts): | |||||||
|     # Retry sleep function |     # Retry sleep function | ||||||
|     def parse_sleep_func(expr): |     def parse_sleep_func(expr): | ||||||
|         NUMBER_RE = r'\d+(?:\.\d+)?' |         NUMBER_RE = r'\d+(?:\.\d+)?' | ||||||
|         op, start, limit, step, *_ = tuple(re.fullmatch( |         op, start, limit, step, *_ = (*tuple(re.fullmatch( | ||||||
|             rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', |             rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', | ||||||
|             expr.strip()).groups()) + (None, None) |             expr.strip()).groups()), None, None) | ||||||
|  |  | ||||||
|         if op == 'exp': |         if op == 'exp': | ||||||
|             return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) |             return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) | ||||||
| @@ -281,18 +294,20 @@ def validate_options(opts): | |||||||
|             raise ValueError(f'invalid {key} retry sleep expression {expr!r}') |             raise ValueError(f'invalid {key} retry sleep expression {expr!r}') | ||||||
|  |  | ||||||
|     # Bytes |     # Bytes | ||||||
|     def validate_bytes(name, value): |     def validate_bytes(name, value, strict_positive=False): | ||||||
|         if value is None: |         if value is None: | ||||||
|             return None |             return None | ||||||
|         numeric_limit = parse_bytes(value) |         numeric_limit = parse_bytes(value) | ||||||
|         validate(numeric_limit is not None, 'rate limit', value) |         validate(numeric_limit is not None, name, value) | ||||||
|  |         if strict_positive: | ||||||
|  |             validate_positive(name, numeric_limit, True) | ||||||
|         return numeric_limit |         return numeric_limit | ||||||
|  |  | ||||||
|     opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) |     opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True) | ||||||
|     opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) |     opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) | ||||||
|     opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) |     opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) | ||||||
|     opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) |     opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) | ||||||
|     opts.buffersize = validate_bytes('buffer size', opts.buffersize) |     opts.buffersize = validate_bytes('buffer size', opts.buffersize, True) | ||||||
|     opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) |     opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) | ||||||
|  |  | ||||||
|     # Output templates |     # Output templates | ||||||
| @@ -387,16 +402,19 @@ def validate_options(opts): | |||||||
|                                  f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') |                                  f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') | ||||||
|         opts.cookiesfrombrowser = (browser_name, profile, keyring, container) |         opts.cookiesfrombrowser = (browser_name, profile, keyring, container) | ||||||
|  |  | ||||||
|  |     if opts.impersonate is not None: | ||||||
|  |         opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower()) | ||||||
|  |  | ||||||
|     # MetadataParser |     # MetadataParser | ||||||
|     def metadataparser_actions(f): |     def metadataparser_actions(f): | ||||||
|         if isinstance(f, str): |         if isinstance(f, str): | ||||||
|             cmd = '--parse-metadata %s' % compat_shlex_quote(f) |             cmd = f'--parse-metadata {shell_quote(f)}' | ||||||
|             try: |             try: | ||||||
|                 actions = [MetadataFromFieldPP.to_action(f)] |                 actions = [MetadataFromFieldPP.to_action(f)] | ||||||
|             except Exception as err: |             except Exception as err: | ||||||
|                 raise ValueError(f'{cmd} is invalid; {err}') |                 raise ValueError(f'{cmd} is invalid; {err}') | ||||||
|         else: |         else: | ||||||
|             cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) |             cmd = f'--replace-in-metadata {shell_quote(f)}' | ||||||
|             actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) |             actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) | ||||||
|  |  | ||||||
|         for action in actions: |         for action in actions: | ||||||
| @@ -407,13 +425,17 @@ def validate_options(opts): | |||||||
|             yield action |             yield action | ||||||
|  |  | ||||||
|     if opts.metafromtitle is not None: |     if opts.metafromtitle is not None: | ||||||
|         opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) |         opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') | ||||||
|     opts.parse_metadata = { |     opts.parse_metadata = { | ||||||
|         k: list(itertools.chain(*map(metadataparser_actions, v))) |         k: list(itertools.chain(*map(metadataparser_actions, v))) | ||||||
|         for k, v in opts.parse_metadata.items() |         for k, v in opts.parse_metadata.items() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     # Other options |     # Other options | ||||||
|  |     opts.plugin_dirs = opts.plugin_dirs | ||||||
|  |     if opts.plugin_dirs is None: | ||||||
|  |         opts.plugin_dirs = ['default'] | ||||||
|  |  | ||||||
|     if opts.playlist_items is not None: |     if opts.playlist_items is not None: | ||||||
|         try: |         try: | ||||||
|             tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) |             tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) | ||||||
| @@ -460,7 +482,7 @@ def validate_options(opts): | |||||||
|             default_downloader = ed.get_basename() |             default_downloader = ed.get_basename() | ||||||
|  |  | ||||||
|     for policy in opts.color.values(): |     for policy in opts.color.values(): | ||||||
|         if policy not in ('always', 'auto', 'no_color', 'never'): |         if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'): | ||||||
|             raise ValueError(f'"{policy}" is not a valid color policy') |             raise ValueError(f'"{policy}" is not a valid color policy') | ||||||
|  |  | ||||||
|     warnings, deprecation_warnings = [], [] |     warnings, deprecation_warnings = [], [] | ||||||
| @@ -586,6 +608,13 @@ def validate_options(opts): | |||||||
|     if opts.ap_username is not None and opts.ap_password is None: |     if opts.ap_username is not None and opts.ap_password is None: | ||||||
|         opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') |         opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') | ||||||
|  |  | ||||||
|  |     # compat option changes global state destructively; only allow from cli | ||||||
|  |     if 'allow-unsafe-ext' in opts.compat_opts: | ||||||
|  |         warnings.append( | ||||||
|  |             'Using allow-unsafe-ext opens you up to potential attacks. ' | ||||||
|  |             'Use with great care!') | ||||||
|  |         _UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x | ||||||
|  |  | ||||||
|     return warnings, deprecation_warnings |     return warnings, deprecation_warnings | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -596,7 +625,7 @@ def get_postprocessors(opts): | |||||||
|         yield { |         yield { | ||||||
|             'key': 'MetadataParser', |             'key': 'MetadataParser', | ||||||
|             'actions': actions, |             'actions': actions, | ||||||
|             'when': when |             'when': when, | ||||||
|         } |         } | ||||||
|     sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove |     sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove | ||||||
|     if sponsorblock_query: |     if sponsorblock_query: | ||||||
| @@ -604,19 +633,19 @@ def get_postprocessors(opts): | |||||||
|             'key': 'SponsorBlock', |             'key': 'SponsorBlock', | ||||||
|             'categories': sponsorblock_query, |             'categories': sponsorblock_query, | ||||||
|             'api': opts.sponsorblock_api, |             'api': opts.sponsorblock_api, | ||||||
|             'when': 'after_filter' |             'when': 'after_filter', | ||||||
|         } |         } | ||||||
|     if opts.convertsubtitles: |     if opts.convertsubtitles: | ||||||
|         yield { |         yield { | ||||||
|             'key': 'FFmpegSubtitlesConvertor', |             'key': 'FFmpegSubtitlesConvertor', | ||||||
|             'format': opts.convertsubtitles, |             'format': opts.convertsubtitles, | ||||||
|             'when': 'before_dl' |             'when': 'before_dl', | ||||||
|         } |         } | ||||||
|     if opts.convertthumbnails: |     if opts.convertthumbnails: | ||||||
|         yield { |         yield { | ||||||
|             'key': 'FFmpegThumbnailsConvertor', |             'key': 'FFmpegThumbnailsConvertor', | ||||||
|             'format': opts.convertthumbnails, |             'format': opts.convertthumbnails, | ||||||
|             'when': 'before_dl' |             'when': 'before_dl', | ||||||
|         } |         } | ||||||
|     if opts.extractaudio: |     if opts.extractaudio: | ||||||
|         yield { |         yield { | ||||||
| @@ -641,7 +670,7 @@ def get_postprocessors(opts): | |||||||
|         yield { |         yield { | ||||||
|             'key': 'FFmpegEmbedSubtitle', |             'key': 'FFmpegEmbedSubtitle', | ||||||
|             # already_have_subtitle = True prevents the file from being deleted after embedding |             # already_have_subtitle = True prevents the file from being deleted after embedding | ||||||
|             'already_have_subtitle': opts.writesubtitles and keep_subs |             'already_have_subtitle': opts.writesubtitles and keep_subs, | ||||||
|         } |         } | ||||||
|         if not opts.writeautomaticsub and keep_subs: |         if not opts.writeautomaticsub and keep_subs: | ||||||
|             opts.writesubtitles = True |             opts.writesubtitles = True | ||||||
| @@ -654,7 +683,7 @@ def get_postprocessors(opts): | |||||||
|             'remove_sponsor_segments': opts.sponsorblock_remove, |             'remove_sponsor_segments': opts.sponsorblock_remove, | ||||||
|             'remove_ranges': opts.remove_ranges, |             'remove_ranges': opts.remove_ranges, | ||||||
|             'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, |             'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, | ||||||
|             'force_keyframes': opts.force_keyframes_at_cuts |             'force_keyframes': opts.force_keyframes_at_cuts, | ||||||
|         } |         } | ||||||
|     # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and |     # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and | ||||||
|     # FFmpegExtractAudioPP as containers before conversion may not support |     # FFmpegExtractAudioPP as containers before conversion may not support | ||||||
| @@ -688,7 +717,7 @@ def get_postprocessors(opts): | |||||||
|         yield { |         yield { | ||||||
|             'key': 'EmbedThumbnail', |             'key': 'EmbedThumbnail', | ||||||
|             # already_have_thumbnail = True prevents the file from being deleted after embedding |             # already_have_thumbnail = True prevents the file from being deleted after embedding | ||||||
|             'already_have_thumbnail': opts.writethumbnail |             'already_have_thumbnail': opts.writethumbnail, | ||||||
|         } |         } | ||||||
|         if not opts.writethumbnail: |         if not opts.writethumbnail: | ||||||
|             opts.writethumbnail = True |             opts.writethumbnail = True | ||||||
| @@ -722,7 +751,7 @@ ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'u | |||||||
| def parse_options(argv=None): | def parse_options(argv=None): | ||||||
|     """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" |     """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" | ||||||
|     parser, opts, urls = parseOpts(argv) |     parser, opts, urls = parseOpts(argv) | ||||||
|     urls = get_urls(urls, opts.batchfile, opts.verbose) |     urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose) | ||||||
|  |  | ||||||
|     set_compat_opts(opts) |     set_compat_opts(opts) | ||||||
|     try: |     try: | ||||||
| @@ -735,7 +764,7 @@ def parse_options(argv=None): | |||||||
|     print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) |     print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) | ||||||
|     any_getting = any(getattr(opts, k) for k in ( |     any_getting = any(getattr(opts, k) for k in ( | ||||||
|         'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', |         'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', | ||||||
|         'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' |         'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', | ||||||
|     )) |     )) | ||||||
|     if opts.quiet is None: |     if opts.quiet is None: | ||||||
|         opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) |         opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) | ||||||
| @@ -830,6 +859,7 @@ def parse_options(argv=None): | |||||||
|         'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, |         'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, | ||||||
|         'progress_with_newline': opts.progress_with_newline, |         'progress_with_newline': opts.progress_with_newline, | ||||||
|         'progress_template': opts.progress_template, |         'progress_template': opts.progress_template, | ||||||
|  |         'progress_delta': opts.progress_delta, | ||||||
|         'playliststart': opts.playliststart, |         'playliststart': opts.playliststart, | ||||||
|         'playlistend': opts.playlistend, |         'playlistend': opts.playlistend, | ||||||
|         'playlistreverse': opts.playlist_reverse, |         'playlistreverse': opts.playlist_reverse, | ||||||
| @@ -858,8 +888,8 @@ def parse_options(argv=None): | |||||||
|         'listsubtitles': opts.listsubtitles, |         'listsubtitles': opts.listsubtitles, | ||||||
|         'subtitlesformat': opts.subtitlesformat, |         'subtitlesformat': opts.subtitlesformat, | ||||||
|         'subtitleslangs': opts.subtitleslangs, |         'subtitleslangs': opts.subtitleslangs, | ||||||
|         'matchtitle': decodeOption(opts.matchtitle), |         'matchtitle': opts.matchtitle, | ||||||
|         'rejecttitle': decodeOption(opts.rejecttitle), |         'rejecttitle': opts.rejecttitle, | ||||||
|         'max_downloads': opts.max_downloads, |         'max_downloads': opts.max_downloads, | ||||||
|         'prefer_free_formats': opts.prefer_free_formats, |         'prefer_free_formats': opts.prefer_free_formats, | ||||||
|         'trim_file_name': opts.trim_file_name, |         'trim_file_name': opts.trim_file_name, | ||||||
| @@ -910,6 +940,7 @@ def parse_options(argv=None): | |||||||
|         'postprocessors': postprocessors, |         'postprocessors': postprocessors, | ||||||
|         'fixup': opts.fixup, |         'fixup': opts.fixup, | ||||||
|         'source_address': opts.source_address, |         'source_address': opts.source_address, | ||||||
|  |         'impersonate': opts.impersonate, | ||||||
|         'call_home': opts.call_home, |         'call_home': opts.call_home, | ||||||
|         'sleep_interval_requests': opts.sleep_interval_requests, |         'sleep_interval_requests': opts.sleep_interval_requests, | ||||||
|         'sleep_interval': opts.sleep_interval, |         'sleep_interval': opts.sleep_interval, | ||||||
| @@ -959,6 +990,11 @@ def _real_main(argv=None): | |||||||
|     if opts.ffmpeg_location: |     if opts.ffmpeg_location: | ||||||
|         FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) |         FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) | ||||||
|  |  | ||||||
|  |     # load all plugins into the global lookup | ||||||
|  |     plugin_dirs.value = opts.plugin_dirs | ||||||
|  |     if plugin_dirs.value: | ||||||
|  |         _load_all_plugins() | ||||||
|  |  | ||||||
|     with YoutubeDL(ydl_opts) as ydl: |     with YoutubeDL(ydl_opts) as ydl: | ||||||
|         pre_process = opts.update_self or opts.rm_cachedir |         pre_process = opts.update_self or opts.rm_cachedir | ||||||
|         actual_use = all_urls or opts.load_info_filename |         actual_use = all_urls or opts.load_info_filename | ||||||
| @@ -979,11 +1015,68 @@ def _real_main(argv=None): | |||||||
|             traceback.print_exc() |             traceback.print_exc() | ||||||
|             ydl._download_retcode = 100 |             ydl._download_retcode = 100 | ||||||
|  |  | ||||||
|  |         if opts.list_impersonate_targets: | ||||||
|  |  | ||||||
|  |             known_targets = [ | ||||||
|  |                 # List of simplified targets we know are supported, | ||||||
|  |                 # to help users know what dependencies may be required. | ||||||
|  |                 (ImpersonateTarget('chrome'), 'curl_cffi'), | ||||||
|  |                 (ImpersonateTarget('safari'), 'curl_cffi'), | ||||||
|  |                 (ImpersonateTarget('firefox'), 'curl_cffi>=0.10'), | ||||||
|  |                 (ImpersonateTarget('edge'), 'curl_cffi'), | ||||||
|  |             ] | ||||||
|  |  | ||||||
|  |             available_targets = ydl._get_available_impersonate_targets() | ||||||
|  |  | ||||||
|  |             def make_row(target, handler): | ||||||
|  |                 return [ | ||||||
|  |                     join_nonempty(target.client.title(), target.version, delim='-') or '-', | ||||||
|  |                     join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', | ||||||
|  |                     handler, | ||||||
|  |                 ] | ||||||
|  |  | ||||||
|  |             rows = [make_row(target, handler) for target, handler in available_targets] | ||||||
|  |  | ||||||
|  |             for known_target, known_handler in known_targets: | ||||||
|  |                 if not any( | ||||||
|  |                     known_target in target and known_handler.startswith(handler) | ||||||
|  |                     for target, handler in available_targets | ||||||
|  |                 ): | ||||||
|  |                     rows.insert(0, [ | ||||||
|  |                         ydl._format_out(text, ydl.Styles.SUPPRESS) | ||||||
|  |                         for text in make_row(known_target, f'{known_handler} (unavailable)') | ||||||
|  |                     ]) | ||||||
|  |  | ||||||
|  |             ydl.to_screen('[info] Available impersonate targets') | ||||||
|  |             ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-')) | ||||||
|  |             return | ||||||
|  |  | ||||||
|         if not actual_use: |         if not actual_use: | ||||||
|             if pre_process: |             if pre_process: | ||||||
|                 return ydl._download_retcode |                 return ydl._download_retcode | ||||||
|  |  | ||||||
|             ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) |             args = sys.argv[1:] if argv is None else argv | ||||||
|  |             ydl.warn_if_short_id(args) | ||||||
|  |  | ||||||
|  |             # Show a useful error message and wait for keypress if not launched from shell on Windows | ||||||
|  |             if not args and os.name == 'nt' and getattr(sys, 'frozen', False): | ||||||
|  |                 import ctypes.wintypes | ||||||
|  |                 import msvcrt | ||||||
|  |  | ||||||
|  |                 kernel32 = ctypes.WinDLL('Kernel32') | ||||||
|  |  | ||||||
|  |                 buffer = (1 * ctypes.wintypes.DWORD)() | ||||||
|  |                 attached_processes = kernel32.GetConsoleProcessList(buffer, 1) | ||||||
|  |                 # If we only have a single process attached, then the executable was double clicked | ||||||
|  |                 # When using `pyinstaller` with `--onefile`, two processes get attached | ||||||
|  |                 is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') | ||||||
|  |                 if attached_processes == 1 or (is_onefile and attached_processes == 2): | ||||||
|  |                     print(parser._generate_error_message( | ||||||
|  |                         'Do not double-click the executable, instead call it from a command line.\n' | ||||||
|  |                         'Please read the README for further information on how to use yt-dlp: ' | ||||||
|  |                         'https://github.com/yt-dlp/yt-dlp#readme')) | ||||||
|  |                     msvcrt.getch() | ||||||
|  |                     _exit(2) | ||||||
|             parser.error( |             parser.error( | ||||||
|                 'You must provide at least one URL.\n' |                 'You must provide at least one URL.\n' | ||||||
|                 'Type yt-dlp --help to see a list of all options.') |                 'Type yt-dlp --help to see a list of all options.') | ||||||
| @@ -1002,11 +1095,10 @@ def _real_main(argv=None): | |||||||
|  |  | ||||||
|  |  | ||||||
| def main(argv=None): | def main(argv=None): | ||||||
|     global _IN_CLI |     IN_CLI.value = True | ||||||
|     _IN_CLI = True |  | ||||||
|     try: |     try: | ||||||
|         _exit(*variadic(_real_main(argv))) |         _exit(*variadic(_real_main(argv))) | ||||||
|     except DownloadError: |     except (CookieLoadError, DownloadError): | ||||||
|         _exit(1) |         _exit(1) | ||||||
|     except SameFileError as e: |     except SameFileError as e: | ||||||
|         _exit(f'ERROR: {e}') |         _exit(f'ERROR: {e}') | ||||||
| @@ -1024,9 +1116,9 @@ def main(argv=None): | |||||||
| from .extractor import gen_extractors, list_extractors | from .extractor import gen_extractors, list_extractors | ||||||
|  |  | ||||||
| __all__ = [ | __all__ = [ | ||||||
|     'main', |  | ||||||
|     'YoutubeDL', |     'YoutubeDL', | ||||||
|     'parse_options', |  | ||||||
|     'gen_extractors', |     'gen_extractors', | ||||||
|     'list_extractors', |     'list_extractors', | ||||||
|  |     'main', | ||||||
|  |     'parse_options', | ||||||
| ] | ] | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| #!/usr/bin/env python3 | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
| # Execute with | # Execute with | ||||||
| # $ python -m yt_dlp | # $ python3 -m yt_dlp | ||||||
|  |  | ||||||
| import sys | import sys | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| import sys | import sys | ||||||
|  |  | ||||||
| from PyInstaller.utils.hooks import collect_submodules | from PyInstaller.utils.hooks import collect_submodules, collect_data_files | ||||||
|  |  | ||||||
|  |  | ||||||
| def pycryptodome_module(): | def pycryptodome_module(): | ||||||
| @@ -10,7 +10,7 @@ def pycryptodome_module(): | |||||||
|         try: |         try: | ||||||
|             import Crypto  # noqa: F401 |             import Crypto  # noqa: F401 | ||||||
|             print('WARNING: Using Crypto since Cryptodome is not available. ' |             print('WARNING: Using Crypto since Cryptodome is not available. ' | ||||||
|                   'Install with: pip install pycryptodomex', file=sys.stderr) |                   'Install with: python3 -m pip install pycryptodomex', file=sys.stderr) | ||||||
|             return 'Crypto' |             return 'Crypto' | ||||||
|         except ImportError: |         except ImportError: | ||||||
|             pass |             pass | ||||||
| @@ -21,12 +21,16 @@ def get_hidden_imports(): | |||||||
|     yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') |     yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') | ||||||
|     yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') |     yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') | ||||||
|     yield pycryptodome_module() |     yield pycryptodome_module() | ||||||
|     yield from collect_submodules('websockets') |     # Only `websockets` is required, others are collected just in case | ||||||
|  |     for module in ('websockets', 'requests', 'urllib3'): | ||||||
|  |         yield from collect_submodules(module) | ||||||
|     # These are auto-detected, but explicitly add them just in case |     # These are auto-detected, but explicitly add them just in case | ||||||
|     yield from ('mutagen', 'brotli', 'certifi') |     yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi') | ||||||
|  |  | ||||||
|  |  | ||||||
| hiddenimports = list(get_hidden_imports()) | hiddenimports = list(get_hidden_imports()) | ||||||
| print(f'Adding imports: {hiddenimports}') | print(f'Adding imports: {hiddenimports}') | ||||||
|  |  | ||||||
| excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] | excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle'] | ||||||
|  |  | ||||||
|  | datas = collect_data_files('curl_cffi', includes=['cacert.pem']) | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ from math import ceil | |||||||
|  |  | ||||||
| from .compat import compat_ord | from .compat import compat_ord | ||||||
| from .dependencies import Cryptodome | from .dependencies import Cryptodome | ||||||
| from .utils import bytes_to_intlist, intlist_to_bytes |  | ||||||
|  |  | ||||||
| if Cryptodome.AES: | if Cryptodome.AES: | ||||||
|     def aes_cbc_decrypt_bytes(data, key, iv): |     def aes_cbc_decrypt_bytes(data, key, iv): | ||||||
| @@ -17,15 +16,15 @@ if Cryptodome.AES: | |||||||
| else: | else: | ||||||
|     def aes_cbc_decrypt_bytes(data, key, iv): |     def aes_cbc_decrypt_bytes(data, key, iv): | ||||||
|         """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ |         """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ | ||||||
|         return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) |         return bytes(aes_cbc_decrypt(*map(list, (data, key, iv)))) | ||||||
|  |  | ||||||
|     def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): |     def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): | ||||||
|         """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ |         """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ | ||||||
|         return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) |         return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce)))) | ||||||
|  |  | ||||||
|  |  | ||||||
| def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): | def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): | ||||||
|     return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) |     return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs)) | ||||||
|  |  | ||||||
|  |  | ||||||
| BLOCK_SIZE_BYTES = 16 | BLOCK_SIZE_BYTES = 16 | ||||||
| @@ -68,7 +67,7 @@ def pad_block(block, padding_mode): | |||||||
|         raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') |         raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') | ||||||
|  |  | ||||||
|     if padding_mode == 'iso7816' and padding_size: |     if padding_mode == 'iso7816' and padding_size: | ||||||
|         block = block + [0x80]  # NB: += mutates list |         block = [*block, 0x80]  # NB: += mutates list | ||||||
|         padding_size -= 1 |         padding_size -= 1 | ||||||
|  |  | ||||||
|     return block + [PADDING_BYTE[padding_mode]] * padding_size |     return block + [PADDING_BYTE[padding_mode]] * padding_size | ||||||
| @@ -84,7 +83,7 @@ def aes_ecb_encrypt(data, key, iv=None): | |||||||
|     @returns {int[]}           encrypted data |     @returns {int[]}           encrypted data | ||||||
|     """ |     """ | ||||||
|     expanded_key = key_expansion(key) |     expanded_key = key_expansion(key) | ||||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) |     block_count = ceil(len(data) / BLOCK_SIZE_BYTES) | ||||||
|  |  | ||||||
|     encrypted_data = [] |     encrypted_data = [] | ||||||
|     for i in range(block_count): |     for i in range(block_count): | ||||||
| @@ -104,15 +103,13 @@ def aes_ecb_decrypt(data, key, iv=None): | |||||||
|     @returns {int[]}           decrypted data |     @returns {int[]}           decrypted data | ||||||
|     """ |     """ | ||||||
|     expanded_key = key_expansion(key) |     expanded_key = key_expansion(key) | ||||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) |     block_count = ceil(len(data) / BLOCK_SIZE_BYTES) | ||||||
|  |  | ||||||
|     encrypted_data = [] |     encrypted_data = [] | ||||||
|     for i in range(block_count): |     for i in range(block_count): | ||||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] |         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||||
|         encrypted_data += aes_decrypt(block, expanded_key) |         encrypted_data += aes_decrypt(block, expanded_key) | ||||||
|     encrypted_data = encrypted_data[:len(data)] |     return encrypted_data[:len(data)] | ||||||
|  |  | ||||||
|     return encrypted_data |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def aes_ctr_decrypt(data, key, iv): | def aes_ctr_decrypt(data, key, iv): | ||||||
| @@ -137,7 +134,7 @@ def aes_ctr_encrypt(data, key, iv): | |||||||
|     @returns {int[]}           encrypted data |     @returns {int[]}           encrypted data | ||||||
|     """ |     """ | ||||||
|     expanded_key = key_expansion(key) |     expanded_key = key_expansion(key) | ||||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) |     block_count = ceil(len(data) / BLOCK_SIZE_BYTES) | ||||||
|     counter = iter_vector(iv) |     counter = iter_vector(iv) | ||||||
|  |  | ||||||
|     encrypted_data = [] |     encrypted_data = [] | ||||||
| @@ -148,9 +145,7 @@ def aes_ctr_encrypt(data, key, iv): | |||||||
|  |  | ||||||
|         cipher_counter_block = aes_encrypt(counter_block, expanded_key) |         cipher_counter_block = aes_encrypt(counter_block, expanded_key) | ||||||
|         encrypted_data += xor(block, cipher_counter_block) |         encrypted_data += xor(block, cipher_counter_block) | ||||||
|     encrypted_data = encrypted_data[:len(data)] |     return encrypted_data[:len(data)] | ||||||
|  |  | ||||||
|     return encrypted_data |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def aes_cbc_decrypt(data, key, iv): | def aes_cbc_decrypt(data, key, iv): | ||||||
| @@ -163,7 +158,7 @@ def aes_cbc_decrypt(data, key, iv): | |||||||
|     @returns {int[]}           decrypted data |     @returns {int[]}           decrypted data | ||||||
|     """ |     """ | ||||||
|     expanded_key = key_expansion(key) |     expanded_key = key_expansion(key) | ||||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) |     block_count = ceil(len(data) / BLOCK_SIZE_BYTES) | ||||||
|  |  | ||||||
|     decrypted_data = [] |     decrypted_data = [] | ||||||
|     previous_cipher_block = iv |     previous_cipher_block = iv | ||||||
| @@ -174,9 +169,7 @@ def aes_cbc_decrypt(data, key, iv): | |||||||
|         decrypted_block = aes_decrypt(block, expanded_key) |         decrypted_block = aes_decrypt(block, expanded_key) | ||||||
|         decrypted_data += xor(decrypted_block, previous_cipher_block) |         decrypted_data += xor(decrypted_block, previous_cipher_block) | ||||||
|         previous_cipher_block = block |         previous_cipher_block = block | ||||||
|     decrypted_data = decrypted_data[:len(data)] |     return decrypted_data[:len(data)] | ||||||
|  |  | ||||||
|     return decrypted_data |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): | def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): | ||||||
| @@ -190,7 +183,7 @@ def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): | |||||||
|     @returns {int[]}           encrypted data |     @returns {int[]}           encrypted data | ||||||
|     """ |     """ | ||||||
|     expanded_key = key_expansion(key) |     expanded_key = key_expansion(key) | ||||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) |     block_count = ceil(len(data) / BLOCK_SIZE_BYTES) | ||||||
|  |  | ||||||
|     encrypted_data = [] |     encrypted_data = [] | ||||||
|     previous_cipher_block = iv |     previous_cipher_block = iv | ||||||
| @@ -224,10 +217,10 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): | |||||||
|     hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) |     hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) | ||||||
|  |  | ||||||
|     if len(nonce) == 12: |     if len(nonce) == 12: | ||||||
|         j0 = nonce + [0, 0, 0, 1] |         j0 = [*nonce, 0, 0, 0, 1] | ||||||
|     else: |     else: | ||||||
|         fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 |         fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 | ||||||
|         ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) |         ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big')) | ||||||
|         j0 = ghash(hash_subkey, ghash_in) |         j0 = ghash(hash_subkey, ghash_in) | ||||||
|  |  | ||||||
|     # TODO: add nonce support to aes_ctr_decrypt |     # TODO: add nonce support to aes_ctr_decrypt | ||||||
| @@ -236,17 +229,17 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): | |||||||
|     iv_ctr = inc(j0) |     iv_ctr = inc(j0) | ||||||
|  |  | ||||||
|     decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) |     decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) | ||||||
|     pad_len = len(data) // 16 * 16 |     pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES | ||||||
|     s_tag = ghash( |     s_tag = ghash( | ||||||
|         hash_subkey, |         hash_subkey, | ||||||
|         data |         data | ||||||
|         + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len)        # pad |         + [0] * pad_len                                  # pad | ||||||
|         + bytes_to_intlist((0 * 8).to_bytes(8, 'big')           # length of associated data |         + list((0 * 8).to_bytes(8, 'big')                # length of associated data | ||||||
|                            + ((len(data) * 8).to_bytes(8, 'big')))  # length of data |                + ((len(data) * 8).to_bytes(8, 'big'))),  # length of data | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     if tag != aes_ctr_encrypt(s_tag, key, j0): |     if tag != aes_ctr_encrypt(s_tag, key, j0): | ||||||
|         raise ValueError("Mismatching authentication tag") |         raise ValueError('Mismatching authentication tag') | ||||||
|  |  | ||||||
|     return decrypted_data |     return decrypted_data | ||||||
|  |  | ||||||
| @@ -288,9 +281,7 @@ def aes_decrypt(data, expanded_key): | |||||||
|             data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) |             data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) | ||||||
|         data = shift_rows_inv(data) |         data = shift_rows_inv(data) | ||||||
|         data = sub_bytes_inv(data) |         data = sub_bytes_inv(data) | ||||||
|     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) |     return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) | ||||||
|  |  | ||||||
|     return data |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def aes_decrypt_text(data, password, key_size_bytes): | def aes_decrypt_text(data, password, key_size_bytes): | ||||||
| @@ -308,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes): | |||||||
|     """ |     """ | ||||||
|     NONCE_LENGTH_BYTES = 8 |     NONCE_LENGTH_BYTES = 8 | ||||||
|  |  | ||||||
|     data = bytes_to_intlist(base64.b64decode(data)) |     data = list(base64.b64decode(data)) | ||||||
|     password = bytes_to_intlist(password.encode()) |     password = list(password.encode()) | ||||||
|  |  | ||||||
|     key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) |     key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) | ||||||
|     key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) |     key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) | ||||||
| @@ -318,9 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes): | |||||||
|     cipher = data[NONCE_LENGTH_BYTES:] |     cipher = data[NONCE_LENGTH_BYTES:] | ||||||
|  |  | ||||||
|     decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) |     decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) | ||||||
|     plaintext = intlist_to_bytes(decrypted_data) |     return bytes(decrypted_data) | ||||||
|  |  | ||||||
|     return plaintext |  | ||||||
|  |  | ||||||
|  |  | ||||||
| RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) | RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) | ||||||
| @@ -428,9 +417,7 @@ def key_expansion(data): | |||||||
|         for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): |         for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): | ||||||
|             temp = data[-4:] |             temp = data[-4:] | ||||||
|             data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) |             data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) | ||||||
|     data = data[:expanded_key_size_bytes] |     return data[:expanded_key_size_bytes] | ||||||
|  |  | ||||||
|     return data |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def iter_vector(iv): | def iter_vector(iv): | ||||||
| @@ -511,7 +498,7 @@ def block_product(block_x, block_y): | |||||||
|     # NIST SP 800-38D, Algorithm 1 |     # NIST SP 800-38D, Algorithm 1 | ||||||
|  |  | ||||||
|     if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: |     if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: | ||||||
|         raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) |         raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') | ||||||
|  |  | ||||||
|     block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) |     block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) | ||||||
|     block_v = block_y[:] |     block_v = block_y[:] | ||||||
| @@ -534,7 +521,7 @@ def ghash(subkey, data): | |||||||
|     # NIST SP 800-38D, Algorithm 2 |     # NIST SP 800-38D, Algorithm 2 | ||||||
|  |  | ||||||
|     if len(data) % BLOCK_SIZE_BYTES: |     if len(data) % BLOCK_SIZE_BYTES: | ||||||
|         raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) |         raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') | ||||||
|  |  | ||||||
|     last_y = [0] * BLOCK_SIZE_BYTES |     last_y = [0] * BLOCK_SIZE_BYTES | ||||||
|     for i in range(0, len(data), BLOCK_SIZE_BYTES): |     for i in range(0, len(data), BLOCK_SIZE_BYTES): | ||||||
| @@ -547,19 +534,17 @@ def ghash(subkey, data): | |||||||
| __all__ = [ | __all__ = [ | ||||||
|     'aes_cbc_decrypt', |     'aes_cbc_decrypt', | ||||||
|     'aes_cbc_decrypt_bytes', |     'aes_cbc_decrypt_bytes', | ||||||
|     'aes_ctr_decrypt', |  | ||||||
|     'aes_decrypt_text', |  | ||||||
|     'aes_decrypt', |  | ||||||
|     'aes_ecb_decrypt', |  | ||||||
|     'aes_gcm_decrypt_and_verify', |  | ||||||
|     'aes_gcm_decrypt_and_verify_bytes', |  | ||||||
|  |  | ||||||
|     'aes_cbc_encrypt', |     'aes_cbc_encrypt', | ||||||
|     'aes_cbc_encrypt_bytes', |     'aes_cbc_encrypt_bytes', | ||||||
|  |     'aes_ctr_decrypt', | ||||||
|     'aes_ctr_encrypt', |     'aes_ctr_encrypt', | ||||||
|  |     'aes_decrypt', | ||||||
|  |     'aes_decrypt_text', | ||||||
|  |     'aes_ecb_decrypt', | ||||||
|     'aes_ecb_encrypt', |     'aes_ecb_encrypt', | ||||||
|     'aes_encrypt', |     'aes_encrypt', | ||||||
|  |     'aes_gcm_decrypt_and_verify', | ||||||
|  |     'aes_gcm_decrypt_and_verify_bytes', | ||||||
|     'key_expansion', |     'key_expansion', | ||||||
|     'pad_block', |     'pad_block', | ||||||
|     'pkcs7_padding', |     'pkcs7_padding', | ||||||
|   | |||||||
| @@ -81,10 +81,10 @@ class Cache: | |||||||
|  |  | ||||||
|         cachedir = self._get_root_dir() |         cachedir = self._get_root_dir() | ||||||
|         if not any((term in cachedir) for term in ('cache', 'tmp')): |         if not any((term in cachedir) for term in ('cache', 'tmp')): | ||||||
|             raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) |             raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') | ||||||
|  |  | ||||||
|         self._ydl.to_screen( |         self._ydl.to_screen( | ||||||
|             'Removing cache dir %s .' % cachedir, skip_eol=True) |             f'Removing cache dir {cachedir} .', skip_eol=True) | ||||||
|         if os.path.exists(cachedir): |         if os.path.exists(cachedir): | ||||||
|             self._ydl.to_screen('.', skip_eol=True) |             self._ydl.to_screen('.', skip_eol=True) | ||||||
|             shutil.rmtree(cachedir) |             shutil.rmtree(cachedir) | ||||||
|   | |||||||
| @@ -1,5 +0,0 @@ | |||||||
| import warnings |  | ||||||
|  |  | ||||||
| warnings.warn(DeprecationWarning(f'{__name__} is deprecated')) |  | ||||||
|  |  | ||||||
| casefold = str.casefold |  | ||||||
| @@ -1,5 +1,4 @@ | |||||||
| import os | import os | ||||||
| import sys |  | ||||||
| import xml.etree.ElementTree as etree | import xml.etree.ElementTree as etree | ||||||
|  |  | ||||||
| from .compat_utils import passthrough_module | from .compat_utils import passthrough_module | ||||||
| @@ -24,36 +23,14 @@ def compat_etree_fromstring(text): | |||||||
|     return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) |     return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) | ||||||
|  |  | ||||||
|  |  | ||||||
| compat_os_name = os._name if os.name == 'java' else os.name |  | ||||||
|  |  | ||||||
|  |  | ||||||
| if compat_os_name == 'nt': |  | ||||||
|     def compat_shlex_quote(s): |  | ||||||
|         import re |  | ||||||
|         return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') |  | ||||||
| else: |  | ||||||
|     from shlex import quote as compat_shlex_quote  # noqa: F401 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def compat_ord(c): | def compat_ord(c): | ||||||
|     return c if isinstance(c, int) else ord(c) |     return c if isinstance(c, int) else ord(c) | ||||||
|  |  | ||||||
|  |  | ||||||
| if compat_os_name == 'nt' and sys.version_info < (3, 8): |  | ||||||
|     # os.path.realpath on Windows does not follow symbolic links |  | ||||||
|     # prior to Python 3.8 (see https://bugs.python.org/issue9949) |  | ||||||
|     def compat_realpath(path): |  | ||||||
|         while os.path.islink(path): |  | ||||||
|             path = os.path.abspath(os.readlink(path)) |  | ||||||
|         return os.path.realpath(path) |  | ||||||
| else: |  | ||||||
|     compat_realpath = os.path.realpath |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl | # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl | ||||||
| # See https://github.com/yt-dlp/yt-dlp/issues/792 | # See https://github.com/yt-dlp/yt-dlp/issues/792 | ||||||
| # https://docs.python.org/3/library/os.path.html#os.path.expanduser | # https://docs.python.org/3/library/os.path.html#os.path.expanduser | ||||||
| if compat_os_name in ('nt', 'ce'): | if os.name in ('nt', 'ce'): | ||||||
|     def compat_expanduser(path): |     def compat_expanduser(path): | ||||||
|         HOME = os.environ.get('HOME') |         HOME = os.environ.get('HOME') | ||||||
|         if not HOME: |         if not HOME: | ||||||
|   | |||||||
| @@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( | |||||||
|     DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) |     DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) | ||||||
| del passthrough_module | del passthrough_module | ||||||
|  |  | ||||||
| import base64 | import functools  # noqa: F401 | ||||||
| import urllib.error | import os | ||||||
| import urllib.parse |  | ||||||
|  |  | ||||||
| compat_str = str |  | ||||||
|  |  | ||||||
| compat_b64decode = base64.b64decode | compat_os_name = os.name | ||||||
|  | compat_realpath = os.path.realpath | ||||||
|  |  | ||||||
| compat_urlparse = urllib.parse |  | ||||||
| compat_parse_qs = urllib.parse.parse_qs | def compat_shlex_quote(s): | ||||||
| compat_urllib_parse_unquote = urllib.parse.unquote |     from ..utils import shell_quote | ||||||
| compat_urllib_parse_urlencode = urllib.parse.urlencode |     return shell_quote(s) | ||||||
| compat_urllib_parse_urlparse = urllib.parse.urlparse |  | ||||||
|   | |||||||
| @@ -30,11 +30,12 @@ from asyncio import run as compat_asyncio_run  # noqa: F401 | |||||||
| from re import Pattern as compat_Pattern  # noqa: F401 | from re import Pattern as compat_Pattern  # noqa: F401 | ||||||
| from re import match as compat_Match  # noqa: F401 | from re import match as compat_Match  # noqa: F401 | ||||||
|  |  | ||||||
| from . import compat_expanduser, compat_HTMLParseError, compat_realpath | from . import compat_expanduser, compat_HTMLParseError | ||||||
| from .compat_utils import passthrough_module | from .compat_utils import passthrough_module | ||||||
| from ..dependencies import brotli as compat_brotli  # noqa: F401 | from ..dependencies import brotli as compat_brotli  # noqa: F401 | ||||||
| from ..dependencies import websockets as compat_websockets  # noqa: F401 | from ..dependencies import websockets as compat_websockets  # noqa: F401 | ||||||
| from ..dependencies.Cryptodome import AES as compat_pycrypto_AES  # noqa: F401 | from ..dependencies.Cryptodome import AES as compat_pycrypto_AES  # noqa: F401 | ||||||
|  | from ..networking.exceptions import HTTPError as compat_HTTPError | ||||||
|  |  | ||||||
| passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) | passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) | ||||||
|  |  | ||||||
| @@ -70,7 +71,6 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError | |||||||
| compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser | compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser | ||||||
| compat_http_client = http.client | compat_http_client = http.client | ||||||
| compat_http_server = http.server | compat_http_server = http.server | ||||||
| compat_HTTPError = urllib.error.HTTPError |  | ||||||
| compat_input = input | compat_input = input | ||||||
| compat_integer_types = (int, ) | compat_integer_types = (int, ) | ||||||
| compat_itertools_count = itertools.count | compat_itertools_count = itertools.count | ||||||
| @@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs | |||||||
| compat_map = map | compat_map = map | ||||||
| compat_numeric_types = (int, float, complex) | compat_numeric_types = (int, float, complex) | ||||||
| compat_os_path_expanduser = compat_expanduser | compat_os_path_expanduser = compat_expanduser | ||||||
| compat_os_path_realpath = compat_realpath | compat_os_path_realpath = os.path.realpath | ||||||
| compat_print = print | compat_print = print | ||||||
| compat_shlex_split = shlex.split | compat_shlex_split = shlex.split | ||||||
| compat_socket_create_connection = socket.create_connection | compat_socket_create_connection = socket.create_connection | ||||||
| @@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack | |||||||
| compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL | compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL | ||||||
| compat_tokenize_tokenize = tokenize.tokenize | compat_tokenize_tokenize = tokenize.tokenize | ||||||
| compat_urllib_error = urllib.error | compat_urllib_error = urllib.error | ||||||
| compat_urllib_HTTPError = urllib.error.HTTPError | compat_urllib_HTTPError = compat_HTTPError | ||||||
| compat_urllib_parse = urllib.parse | compat_urllib_parse = urllib.parse | ||||||
| compat_urllib_parse_parse_qs = urllib.parse.parse_qs | compat_urllib_parse_parse_qs = urllib.parse.parse_qs | ||||||
| compat_urllib_parse_quote = urllib.parse.quote | compat_urllib_parse_quote = urllib.parse.quote | ||||||
| @@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr | |||||||
| compat_xpath = lambda xpath: xpath | compat_xpath = lambda xpath: xpath | ||||||
| compat_zip = zip | compat_zip = zip | ||||||
| workaround_optparse_bug9161 = lambda: None | workaround_optparse_bug9161 = lambda: None | ||||||
|  | compat_str = str | ||||||
|  | compat_b64decode = base64.b64decode | ||||||
|  | compat_urlparse = urllib.parse | ||||||
|  | compat_parse_qs = urllib.parse.parse_qs | ||||||
|  | compat_urllib_parse_unquote = urllib.parse.unquote | ||||||
|  | compat_urllib_parse_urlencode = urllib.parse.urlencode | ||||||
|  | compat_urllib_parse_urlparse = urllib.parse.urlparse | ||||||
|  |  | ||||||
| legacy = [] | legacy = [] | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ def get_package_info(module): | |||||||
|         name=getattr(module, '_yt_dlp__identifier', module.__name__), |         name=getattr(module, '_yt_dlp__identifier', module.__name__), | ||||||
|         version=str(next(filter(None, ( |         version=str(next(filter(None, ( | ||||||
|             getattr(module, attr, None) |             getattr(module, attr, None) | ||||||
|             for attr in ('__version__', 'version_string', 'version') |             for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version') | ||||||
|         )), None))) |         )), None))) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la | |||||||
|         callback(attr) |         callback(attr) | ||||||
|         return ret |         return ret | ||||||
|  |  | ||||||
|     @functools.lru_cache(maxsize=None) |     @functools.cache | ||||||
|     def from_child(attr): |     def from_child(attr): | ||||||
|         nonlocal child |         nonlocal child | ||||||
|         if attr not in allowed_attributes: |         if attr not in allowed_attributes: | ||||||
|   | |||||||
| @@ -1,26 +0,0 @@ | |||||||
| # flake8: noqa: F405 |  | ||||||
| from functools import *  # noqa: F403 |  | ||||||
|  |  | ||||||
| from .compat_utils import passthrough_module |  | ||||||
|  |  | ||||||
| passthrough_module(__name__, 'functools') |  | ||||||
| del passthrough_module |  | ||||||
|  |  | ||||||
| try: |  | ||||||
|     cache  # >= 3.9 |  | ||||||
| except NameError: |  | ||||||
|     cache = lru_cache(maxsize=None) |  | ||||||
|  |  | ||||||
| try: |  | ||||||
|     cached_property  # >= 3.8 |  | ||||||
| except NameError: |  | ||||||
|     class cached_property: |  | ||||||
|         def __init__(self, func): |  | ||||||
|             update_wrapper(self, func) |  | ||||||
|             self.func = func |  | ||||||
|  |  | ||||||
|         def __get__(self, instance, _): |  | ||||||
|             if instance is None: |  | ||||||
|                 return self |  | ||||||
|             setattr(instance, self.func.__name__, self.func(instance)) |  | ||||||
|             return getattr(instance, self.func.__name__) |  | ||||||
| @@ -1,16 +1,22 @@ | |||||||
| tests = { |  | ||||||
|     'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', |  | ||||||
|     'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', |  | ||||||
|     'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), |  | ||||||
|     'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def what(file=None, h=None): | def what(file=None, h=None): | ||||||
|     """Detect format of image (Currently supports jpeg, png, webp, gif only) |     """Detect format of image (Currently supports jpeg, png, webp, gif only) | ||||||
|     Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py |     Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py | ||||||
|  |     Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf | ||||||
|     """ |     """ | ||||||
|     if h is None: |     if h is None: | ||||||
|         with open(file, 'rb') as f: |         with open(file, 'rb') as f: | ||||||
|             h = f.read(12) |             h = f.read(12) | ||||||
|     return next((type_ for type_, test in tests.items() if test(h)), None) |  | ||||||
|  |     if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8): | ||||||
|  |         return 'webp' | ||||||
|  |  | ||||||
|  |     if h.startswith(b'\x89PNG'): | ||||||
|  |         return 'png' | ||||||
|  |  | ||||||
|  |     if h.startswith(b'\xFF\xD8\xFF'): | ||||||
|  |         return 'jpeg' | ||||||
|  |  | ||||||
|  |     if h.startswith(b'GIF'): | ||||||
|  |         return 'gif' | ||||||
|  |  | ||||||
|  |     return None | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| # flake8: noqa: F405 | # flake8: noqa: F405 | ||||||
| from urllib import *  # noqa: F403 | from urllib import *  # noqa: F403 | ||||||
|  |  | ||||||
| del request | del request  # noqa: F821 | ||||||
| from . import request  # noqa: F401 | from . import request  # noqa: F401 | ||||||
|  |  | ||||||
| from ..compat_utils import passthrough_module | from ..compat_utils import passthrough_module | ||||||
|   | |||||||
| @@ -7,13 +7,13 @@ passthrough_module(__name__, 'urllib.request') | |||||||
| del passthrough_module | del passthrough_module | ||||||
|  |  | ||||||
|  |  | ||||||
| from .. import compat_os_name | import os | ||||||
|  |  | ||||||
| if compat_os_name == 'nt': | if os.name == 'nt': | ||||||
|     # On older python versions, proxies are extracted from Windows registry erroneously. [1] |     # On older Python versions, proxies are extracted from Windows registry erroneously. [1] | ||||||
|     # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] |     # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] | ||||||
|     # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade |     # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade | ||||||
|     # it to http on these older python versions to avoid issues |     # it to http on these older Python versions to avoid issues | ||||||
|     # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. |     # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. | ||||||
|     # 1: https://github.com/python/cpython/issues/86793 |     # 1: https://github.com/python/cpython/issues/86793 | ||||||
|     # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 |     # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 | ||||||
| @@ -37,4 +37,4 @@ if compat_os_name == 'nt': | |||||||
|     def getproxies(): |     def getproxies(): | ||||||
|         return getproxies_environment() or getproxies_registry_patched() |         return getproxies_environment() or getproxies_registry_patched() | ||||||
|  |  | ||||||
| del compat_os_name | del os | ||||||
|   | |||||||
| @@ -1,6 +1,10 @@ | |||||||
| import base64 | import base64 | ||||||
| import collections | import collections | ||||||
| import contextlib | import contextlib | ||||||
|  | import datetime as dt | ||||||
|  | import functools | ||||||
|  | import glob | ||||||
|  | import hashlib | ||||||
| import http.cookiejar | import http.cookiejar | ||||||
| import http.cookies | import http.cookies | ||||||
| import io | import io | ||||||
| @@ -14,16 +18,13 @@ import sys | |||||||
| import tempfile | import tempfile | ||||||
| import time | import time | ||||||
| import urllib.request | import urllib.request | ||||||
| from datetime import datetime, timedelta, timezone |  | ||||||
| from enum import Enum, auto | from enum import Enum, auto | ||||||
| from hashlib import pbkdf2_hmac |  | ||||||
|  |  | ||||||
| from .aes import ( | from .aes import ( | ||||||
|     aes_cbc_decrypt_bytes, |     aes_cbc_decrypt_bytes, | ||||||
|     aes_gcm_decrypt_and_verify_bytes, |     aes_gcm_decrypt_and_verify_bytes, | ||||||
|     unpad_pkcs7, |     unpad_pkcs7, | ||||||
| ) | ) | ||||||
| from .compat import functools |  | ||||||
| from .dependencies import ( | from .dependencies import ( | ||||||
|     _SECRETSTORAGE_UNAVAILABLE_REASON, |     _SECRETSTORAGE_UNAVAILABLE_REASON, | ||||||
|     secretstorage, |     secretstorage, | ||||||
| @@ -31,6 +32,8 @@ from .dependencies import ( | |||||||
| ) | ) | ||||||
| from .minicurses import MultilinePrinter, QuietMultilinePrinter | from .minicurses import MultilinePrinter, QuietMultilinePrinter | ||||||
| from .utils import ( | from .utils import ( | ||||||
|  |     DownloadError, | ||||||
|  |     YoutubeDLError, | ||||||
|     Popen, |     Popen, | ||||||
|     error_to_str, |     error_to_str, | ||||||
|     expand_path, |     expand_path, | ||||||
| @@ -43,7 +46,7 @@ from .utils import ( | |||||||
| from .utils._utils import _YDLLogger | from .utils._utils import _YDLLogger | ||||||
| from .utils.networking import normalize_url | from .utils.networking import normalize_url | ||||||
|  |  | ||||||
| CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} | CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'} | ||||||
| SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} | SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -83,24 +86,31 @@ def _create_progress_bar(logger): | |||||||
|     return printer |     return printer | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CookieLoadError(YoutubeDLError): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| def load_cookies(cookie_file, browser_specification, ydl): | def load_cookies(cookie_file, browser_specification, ydl): | ||||||
|     cookie_jars = [] |     try: | ||||||
|     if browser_specification is not None: |         cookie_jars = [] | ||||||
|         browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) |         if browser_specification is not None: | ||||||
|         cookie_jars.append( |             browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) | ||||||
|             extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) |             cookie_jars.append( | ||||||
|  |                 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) | ||||||
|  |  | ||||||
|     if cookie_file is not None: |         if cookie_file is not None: | ||||||
|         is_filename = is_path_like(cookie_file) |             is_filename = is_path_like(cookie_file) | ||||||
|         if is_filename: |             if is_filename: | ||||||
|             cookie_file = expand_path(cookie_file) |                 cookie_file = expand_path(cookie_file) | ||||||
|  |  | ||||||
|         jar = YoutubeDLCookieJar(cookie_file) |             jar = YoutubeDLCookieJar(cookie_file) | ||||||
|         if not is_filename or os.access(cookie_file, os.R_OK): |             if not is_filename or os.access(cookie_file, os.R_OK): | ||||||
|             jar.load() |                 jar.load() | ||||||
|         cookie_jars.append(jar) |             cookie_jars.append(jar) | ||||||
|  |  | ||||||
|     return _merge_cookie_jars(cookie_jars) |         return _merge_cookie_jars(cookie_jars) | ||||||
|  |     except Exception: | ||||||
|  |         raise CookieLoadError('failed to load cookies') | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): | def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): | ||||||
| @@ -118,17 +128,18 @@ def _extract_firefox_cookies(profile, container, logger): | |||||||
|     logger.info('Extracting cookies from firefox') |     logger.info('Extracting cookies from firefox') | ||||||
|     if not sqlite3: |     if not sqlite3: | ||||||
|         logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' |         logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' | ||||||
|                        'Please use a python interpreter compiled with sqlite3 support') |                        'Please use a Python interpreter compiled with sqlite3 support') | ||||||
|         return YoutubeDLCookieJar() |         return YoutubeDLCookieJar() | ||||||
|  |  | ||||||
|     if profile is None: |     if profile is None: | ||||||
|         search_root = _firefox_browser_dir() |         search_roots = list(_firefox_browser_dirs()) | ||||||
|     elif _is_path(profile): |     elif _is_path(profile): | ||||||
|         search_root = profile |         search_roots = [profile] | ||||||
|     else: |     else: | ||||||
|         search_root = os.path.join(_firefox_browser_dir(), profile) |         search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()] | ||||||
|  |     search_root = ', '.join(map(repr, search_roots)) | ||||||
|  |  | ||||||
|     cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) |     cookie_database_path = _newest(_firefox_cookie_dbs(search_roots)) | ||||||
|     if cookie_database_path is None: |     if cookie_database_path is None: | ||||||
|         raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') |         raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') | ||||||
|     logger.debug(f'Extracting cookies from: "{cookie_database_path}"') |     logger.debug(f'Extracting cookies from: "{cookie_database_path}"') | ||||||
| @@ -142,7 +153,7 @@ def _extract_firefox_cookies(profile, container, logger): | |||||||
|             identities = json.load(containers).get('identities', []) |             identities = json.load(containers).get('identities', []) | ||||||
|         container_id = next((context.get('userContextId') for context in identities if container in ( |         container_id = next((context.get('userContextId') for context in identities if container in ( | ||||||
|             context.get('name'), |             context.get('name'), | ||||||
|             try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) |             try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), | ||||||
|         )), None) |         )), None) | ||||||
|         if not isinstance(container_id, int): |         if not isinstance(container_id, int): | ||||||
|             raise ValueError(f'could not find firefox container "{container}" in containers.json') |             raise ValueError(f'could not find firefox container "{container}" in containers.json') | ||||||
| @@ -182,12 +193,28 @@ def _extract_firefox_cookies(profile, container, logger): | |||||||
|                 cursor.connection.close() |                 cursor.connection.close() | ||||||
|  |  | ||||||
|  |  | ||||||
| def _firefox_browser_dir(): | def _firefox_browser_dirs(): | ||||||
|     if sys.platform in ('cygwin', 'win32'): |     if sys.platform in ('cygwin', 'win32'): | ||||||
|         return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') |         yield from map(os.path.expandvars, ( | ||||||
|  |             R'%APPDATA%\Mozilla\Firefox\Profiles', | ||||||
|  |             R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles', | ||||||
|  |         )) | ||||||
|  |  | ||||||
|     elif sys.platform == 'darwin': |     elif sys.platform == 'darwin': | ||||||
|         return os.path.expanduser('~/Library/Application Support/Firefox') |         yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') | ||||||
|     return os.path.expanduser('~/.mozilla/firefox') |  | ||||||
|  |     else: | ||||||
|  |         yield from map(os.path.expanduser, ( | ||||||
|  |             '~/.mozilla/firefox', | ||||||
|  |             '~/snap/firefox/common/.mozilla/firefox', | ||||||
|  |             '~/.var/app/org.mozilla.firefox/.mozilla/firefox', | ||||||
|  |         )) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _firefox_cookie_dbs(roots): | ||||||
|  |     for root in map(os.path.abspath, roots): | ||||||
|  |         for pattern in ('', '*/', 'Profiles/*/'): | ||||||
|  |             yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite')) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _get_chromium_based_browser_settings(browser_name): | def _get_chromium_based_browser_settings(browser_name): | ||||||
| @@ -202,6 +229,7 @@ def _get_chromium_based_browser_settings(browser_name): | |||||||
|             'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), |             'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), | ||||||
|             'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), |             'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), | ||||||
|             'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), |             'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), | ||||||
|  |             'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'), | ||||||
|         }[browser_name] |         }[browser_name] | ||||||
|  |  | ||||||
|     elif sys.platform == 'darwin': |     elif sys.platform == 'darwin': | ||||||
| @@ -213,6 +241,7 @@ def _get_chromium_based_browser_settings(browser_name): | |||||||
|             'edge': os.path.join(appdata, 'Microsoft Edge'), |             'edge': os.path.join(appdata, 'Microsoft Edge'), | ||||||
|             'opera': os.path.join(appdata, 'com.operasoftware.Opera'), |             'opera': os.path.join(appdata, 'com.operasoftware.Opera'), | ||||||
|             'vivaldi': os.path.join(appdata, 'Vivaldi'), |             'vivaldi': os.path.join(appdata, 'Vivaldi'), | ||||||
|  |             'whale': os.path.join(appdata, 'Naver/Whale'), | ||||||
|         }[browser_name] |         }[browser_name] | ||||||
|  |  | ||||||
|     else: |     else: | ||||||
| @@ -224,6 +253,7 @@ def _get_chromium_based_browser_settings(browser_name): | |||||||
|             'edge': os.path.join(config, 'microsoft-edge'), |             'edge': os.path.join(config, 'microsoft-edge'), | ||||||
|             'opera': os.path.join(config, 'opera'), |             'opera': os.path.join(config, 'opera'), | ||||||
|             'vivaldi': os.path.join(config, 'vivaldi'), |             'vivaldi': os.path.join(config, 'vivaldi'), | ||||||
|  |             'whale': os.path.join(config, 'naver-whale'), | ||||||
|         }[browser_name] |         }[browser_name] | ||||||
|  |  | ||||||
|     # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: |     # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: | ||||||
| @@ -235,6 +265,7 @@ def _get_chromium_based_browser_settings(browser_name): | |||||||
|         'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', |         'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', | ||||||
|         'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', |         'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', | ||||||
|         'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', |         'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', | ||||||
|  |         'whale': 'Whale', | ||||||
|     }[browser_name] |     }[browser_name] | ||||||
|  |  | ||||||
|     browsers_without_profiles = {'opera'} |     browsers_without_profiles = {'opera'} | ||||||
| @@ -242,7 +273,7 @@ def _get_chromium_based_browser_settings(browser_name): | |||||||
|     return { |     return { | ||||||
|         'browser_dir': browser_dir, |         'browser_dir': browser_dir, | ||||||
|         'keyring_name': keyring_name, |         'keyring_name': keyring_name, | ||||||
|         'supports_profiles': browser_name not in browsers_without_profiles |         'supports_profiles': browser_name not in browsers_without_profiles, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -251,7 +282,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): | |||||||
|  |  | ||||||
|     if not sqlite3: |     if not sqlite3: | ||||||
|         logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' |         logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' | ||||||
|                        'Please use a python interpreter compiled with sqlite3 support') |                        'Please use a Python interpreter compiled with sqlite3 support') | ||||||
|         return YoutubeDLCookieJar() |         return YoutubeDLCookieJar() | ||||||
|  |  | ||||||
|     config = _get_chromium_based_browser_settings(browser_name) |     config = _get_chromium_based_browser_settings(browser_name) | ||||||
| @@ -268,17 +299,23 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): | |||||||
|             logger.error(f'{browser_name} does not support profiles') |             logger.error(f'{browser_name} does not support profiles') | ||||||
|             search_root = config['browser_dir'] |             search_root = config['browser_dir'] | ||||||
|  |  | ||||||
|     cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) |     cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger)) | ||||||
|     if cookie_database_path is None: |     if cookie_database_path is None: | ||||||
|         raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') |         raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') | ||||||
|     logger.debug(f'Extracting cookies from: "{cookie_database_path}"') |     logger.debug(f'Extracting cookies from: "{cookie_database_path}"') | ||||||
|  |  | ||||||
|     decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) |  | ||||||
|  |  | ||||||
|     with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: |     with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: | ||||||
|         cursor = None |         cursor = None | ||||||
|         try: |         try: | ||||||
|             cursor = _open_database_copy(cookie_database_path, tmpdir) |             cursor = _open_database_copy(cookie_database_path, tmpdir) | ||||||
|  |  | ||||||
|  |             # meta_version is necessary to determine if we need to trim the hash prefix from the cookies | ||||||
|  |             # Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223 | ||||||
|  |             meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0]) | ||||||
|  |             decryptor = get_cookie_decryptor( | ||||||
|  |                 config['browser_dir'], config['keyring_name'], logger, | ||||||
|  |                 keyring=keyring, meta_version=meta_version) | ||||||
|  |  | ||||||
|             cursor.connection.text_factory = bytes |             cursor.connection.text_factory = bytes | ||||||
|             column_names = _get_column_names(cursor, 'cookies') |             column_names = _get_column_names(cursor, 'cookies') | ||||||
|             secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' |             secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' | ||||||
| @@ -307,6 +344,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): | |||||||
|             counts['unencrypted'] = unencrypted_cookies |             counts['unencrypted'] = unencrypted_cookies | ||||||
|             logger.debug(f'cookie version breakdown: {counts}') |             logger.debug(f'cookie version breakdown: {counts}') | ||||||
|             return jar |             return jar | ||||||
|  |         except PermissionError as error: | ||||||
|  |             if os.name == 'nt' and error.errno == 13: | ||||||
|  |                 message = 'Could not copy Chrome cookie database. See  https://github.com/yt-dlp/yt-dlp/issues/7271  for more info' | ||||||
|  |                 logger.error(message) | ||||||
|  |                 raise DownloadError(message)  # force exit | ||||||
|  |             raise | ||||||
|         finally: |         finally: | ||||||
|             if cursor is not None: |             if cursor is not None: | ||||||
|                 cursor.connection.close() |                 cursor.connection.close() | ||||||
| @@ -324,6 +367,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa | |||||||
|         if value is None: |         if value is None: | ||||||
|             return is_encrypted, None |             return is_encrypted, None | ||||||
|  |  | ||||||
|  |     # In chrome, session cookies have expires_utc set to 0 | ||||||
|  |     # In our cookie-store, cookies that do not expire should have expires set to None | ||||||
|  |     if not expires_utc: | ||||||
|  |         expires_utc = None | ||||||
|  |  | ||||||
|     return is_encrypted, http.cookiejar.Cookie( |     return is_encrypted, http.cookiejar.Cookie( | ||||||
|         version=0, name=name, value=value, port=None, port_specified=False, |         version=0, name=name, value=value, port=None, port_specified=False, | ||||||
|         domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), |         domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), | ||||||
| @@ -365,22 +413,23 @@ class ChromeCookieDecryptor: | |||||||
|         raise NotImplementedError('Must be implemented by sub classes') |         raise NotImplementedError('Must be implemented by sub classes') | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): | def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None): | ||||||
|     if sys.platform == 'darwin': |     if sys.platform == 'darwin': | ||||||
|         return MacChromeCookieDecryptor(browser_keyring_name, logger) |         return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version) | ||||||
|     elif sys.platform in ('win32', 'cygwin'): |     elif sys.platform in ('win32', 'cygwin'): | ||||||
|         return WindowsChromeCookieDecryptor(browser_root, logger) |         return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version) | ||||||
|     return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) |     return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version) | ||||||
|  |  | ||||||
|  |  | ||||||
| class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): | class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): | ||||||
|     def __init__(self, browser_keyring_name, logger, *, keyring=None): |     def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None): | ||||||
|         self._logger = logger |         self._logger = logger | ||||||
|         self._v10_key = self.derive_key(b'peanuts') |         self._v10_key = self.derive_key(b'peanuts') | ||||||
|         self._empty_key = self.derive_key(b'') |         self._empty_key = self.derive_key(b'') | ||||||
|         self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} |         self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} | ||||||
|         self._browser_keyring_name = browser_keyring_name |         self._browser_keyring_name = browser_keyring_name | ||||||
|         self._keyring = keyring |         self._keyring = keyring | ||||||
|  |         self._meta_version = meta_version or 0 | ||||||
|  |  | ||||||
|     @functools.cached_property |     @functools.cached_property | ||||||
|     def _v11_key(self): |     def _v11_key(self): | ||||||
| @@ -409,14 +458,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): | |||||||
|  |  | ||||||
|         if version == b'v10': |         if version == b'v10': | ||||||
|             self._cookie_counts['v10'] += 1 |             self._cookie_counts['v10'] += 1 | ||||||
|             return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) |             return _decrypt_aes_cbc_multi( | ||||||
|  |                 ciphertext, (self._v10_key, self._empty_key), self._logger, | ||||||
|  |                 hash_prefix=self._meta_version >= 24) | ||||||
|  |  | ||||||
|         elif version == b'v11': |         elif version == b'v11': | ||||||
|             self._cookie_counts['v11'] += 1 |             self._cookie_counts['v11'] += 1 | ||||||
|             if self._v11_key is None: |             if self._v11_key is None: | ||||||
|                 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) |                 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) | ||||||
|                 return None |                 return None | ||||||
|             return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) |             return _decrypt_aes_cbc_multi( | ||||||
|  |                 ciphertext, (self._v11_key, self._empty_key), self._logger, | ||||||
|  |                 hash_prefix=self._meta_version >= 24) | ||||||
|  |  | ||||||
|         else: |         else: | ||||||
|             self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) |             self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) | ||||||
| @@ -425,11 +478,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): | |||||||
|  |  | ||||||
|  |  | ||||||
| class MacChromeCookieDecryptor(ChromeCookieDecryptor): | class MacChromeCookieDecryptor(ChromeCookieDecryptor): | ||||||
|     def __init__(self, browser_keyring_name, logger): |     def __init__(self, browser_keyring_name, logger, meta_version=None): | ||||||
|         self._logger = logger |         self._logger = logger | ||||||
|         password = _get_mac_keyring_password(browser_keyring_name, logger) |         password = _get_mac_keyring_password(browser_keyring_name, logger) | ||||||
|         self._v10_key = None if password is None else self.derive_key(password) |         self._v10_key = None if password is None else self.derive_key(password) | ||||||
|         self._cookie_counts = {'v10': 0, 'other': 0} |         self._cookie_counts = {'v10': 0, 'other': 0} | ||||||
|  |         self._meta_version = meta_version or 0 | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def derive_key(password): |     def derive_key(password): | ||||||
| @@ -447,7 +501,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): | |||||||
|                 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) |                 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) | ||||||
|                 return None |                 return None | ||||||
|  |  | ||||||
|             return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) |             return _decrypt_aes_cbc_multi( | ||||||
|  |                 ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24) | ||||||
|  |  | ||||||
|         else: |         else: | ||||||
|             self._cookie_counts['other'] += 1 |             self._cookie_counts['other'] += 1 | ||||||
| @@ -457,10 +512,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): | |||||||
|  |  | ||||||
|  |  | ||||||
| class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): | class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): | ||||||
|     def __init__(self, browser_root, logger): |     def __init__(self, browser_root, logger, meta_version=None): | ||||||
|         self._logger = logger |         self._logger = logger | ||||||
|         self._v10_key = _get_windows_v10_key(browser_root, logger) |         self._v10_key = _get_windows_v10_key(browser_root, logger) | ||||||
|         self._cookie_counts = {'v10': 0, 'other': 0} |         self._cookie_counts = {'v10': 0, 'other': 0} | ||||||
|  |         self._meta_version = meta_version or 0 | ||||||
|  |  | ||||||
|     def decrypt(self, encrypted_value): |     def decrypt(self, encrypted_value): | ||||||
|         version = encrypted_value[:3] |         version = encrypted_value[:3] | ||||||
| @@ -484,7 +540,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): | |||||||
|             ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] |             ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] | ||||||
|             authentication_tag = raw_ciphertext[-authentication_tag_length:] |             authentication_tag = raw_ciphertext[-authentication_tag_length:] | ||||||
|  |  | ||||||
|             return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) |             return _decrypt_aes_gcm( | ||||||
|  |                 ciphertext, self._v10_key, nonce, authentication_tag, self._logger, | ||||||
|  |                 hash_prefix=self._meta_version >= 24) | ||||||
|  |  | ||||||
|         else: |         else: | ||||||
|             self._cookie_counts['other'] += 1 |             self._cookie_counts['other'] += 1 | ||||||
| @@ -575,7 +633,7 @@ class DataParser: | |||||||
|  |  | ||||||
|  |  | ||||||
| def _mac_absolute_time_to_posix(timestamp): | def _mac_absolute_time_to_posix(timestamp): | ||||||
|     return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) |     return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp()) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _parse_safari_cookies_header(data, logger): | def _parse_safari_cookies_header(data, logger): | ||||||
| @@ -708,40 +766,38 @@ def _get_linux_desktop_environment(env, logger): | |||||||
|     xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) |     xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) | ||||||
|     desktop_session = env.get('DESKTOP_SESSION', None) |     desktop_session = env.get('DESKTOP_SESSION', None) | ||||||
|     if xdg_current_desktop is not None: |     if xdg_current_desktop is not None: | ||||||
|         xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() |         for part in map(str.strip, xdg_current_desktop.split(':')): | ||||||
|  |             if part == 'Unity': | ||||||
|         if xdg_current_desktop == 'Unity': |                 if desktop_session is not None and 'gnome-fallback' in desktop_session: | ||||||
|             if desktop_session is not None and 'gnome-fallback' in desktop_session: |                     return _LinuxDesktopEnvironment.GNOME | ||||||
|  |                 else: | ||||||
|  |                     return _LinuxDesktopEnvironment.UNITY | ||||||
|  |             elif part == 'Deepin': | ||||||
|  |                 return _LinuxDesktopEnvironment.DEEPIN | ||||||
|  |             elif part == 'GNOME': | ||||||
|                 return _LinuxDesktopEnvironment.GNOME |                 return _LinuxDesktopEnvironment.GNOME | ||||||
|             else: |             elif part == 'X-Cinnamon': | ||||||
|                 return _LinuxDesktopEnvironment.UNITY |                 return _LinuxDesktopEnvironment.CINNAMON | ||||||
|         elif xdg_current_desktop == 'Deepin': |             elif part == 'KDE': | ||||||
|             return _LinuxDesktopEnvironment.DEEPIN |                 kde_version = env.get('KDE_SESSION_VERSION', None) | ||||||
|         elif xdg_current_desktop == 'GNOME': |                 if kde_version == '5': | ||||||
|             return _LinuxDesktopEnvironment.GNOME |                     return _LinuxDesktopEnvironment.KDE5 | ||||||
|         elif xdg_current_desktop == 'X-Cinnamon': |                 elif kde_version == '6': | ||||||
|             return _LinuxDesktopEnvironment.CINNAMON |                     return _LinuxDesktopEnvironment.KDE6 | ||||||
|         elif xdg_current_desktop == 'KDE': |                 elif kde_version == '4': | ||||||
|             kde_version = env.get('KDE_SESSION_VERSION', None) |                     return _LinuxDesktopEnvironment.KDE4 | ||||||
|             if kde_version == '5': |                 else: | ||||||
|                 return _LinuxDesktopEnvironment.KDE5 |                     logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') | ||||||
|             elif kde_version == '6': |                     return _LinuxDesktopEnvironment.KDE4 | ||||||
|                 return _LinuxDesktopEnvironment.KDE6 |             elif part == 'Pantheon': | ||||||
|             elif kde_version == '4': |                 return _LinuxDesktopEnvironment.PANTHEON | ||||||
|                 return _LinuxDesktopEnvironment.KDE4 |             elif part == 'XFCE': | ||||||
|             else: |                 return _LinuxDesktopEnvironment.XFCE | ||||||
|                 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') |             elif part == 'UKUI': | ||||||
|                 return _LinuxDesktopEnvironment.KDE4 |                 return _LinuxDesktopEnvironment.UKUI | ||||||
|         elif xdg_current_desktop == 'Pantheon': |             elif part == 'LXQt': | ||||||
|             return _LinuxDesktopEnvironment.PANTHEON |                 return _LinuxDesktopEnvironment.LXQT | ||||||
|         elif xdg_current_desktop == 'XFCE': |         logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') | ||||||
|             return _LinuxDesktopEnvironment.XFCE |  | ||||||
|         elif xdg_current_desktop == 'UKUI': |  | ||||||
|             return _LinuxDesktopEnvironment.UKUI |  | ||||||
|         elif xdg_current_desktop == 'LXQt': |  | ||||||
|             return _LinuxDesktopEnvironment.LXQT |  | ||||||
|         else: |  | ||||||
|             logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') |  | ||||||
|  |  | ||||||
|     elif desktop_session is not None: |     elif desktop_session is not None: | ||||||
|         if desktop_session == 'deepin': |         if desktop_session == 'deepin': | ||||||
| @@ -794,7 +850,7 @@ def _choose_linux_keyring(logger): | |||||||
|     elif desktop_environment == _LinuxDesktopEnvironment.KDE6: |     elif desktop_environment == _LinuxDesktopEnvironment.KDE6: | ||||||
|         linux_keyring = _LinuxKeyring.KWALLET6 |         linux_keyring = _LinuxKeyring.KWALLET6 | ||||||
|     elif desktop_environment in ( |     elif desktop_environment in ( | ||||||
|         _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER |         _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, | ||||||
|     ): |     ): | ||||||
|         linux_keyring = _LinuxKeyring.BASICTEXT |         linux_keyring = _LinuxKeyring.BASICTEXT | ||||||
|     else: |     else: | ||||||
| @@ -829,7 +885,7 @@ def _get_kwallet_network_wallet(keyring, logger): | |||||||
|             'dbus-send', '--session', '--print-reply=literal', |             'dbus-send', '--session', '--print-reply=literal', | ||||||
|             f'--dest={service_name}', |             f'--dest={service_name}', | ||||||
|             wallet_path, |             wallet_path, | ||||||
|             'org.kde.KWallet.networkWallet' |             'org.kde.KWallet.networkWallet', | ||||||
|         ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) |         ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) | ||||||
|  |  | ||||||
|         if returncode: |         if returncode: | ||||||
| @@ -859,7 +915,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger): | |||||||
|             'kwallet-query', |             'kwallet-query', | ||||||
|             '--read-password', f'{browser_keyring_name} Safe Storage', |             '--read-password', f'{browser_keyring_name} Safe Storage', | ||||||
|             '--folder', f'{browser_keyring_name} Keys', |             '--folder', f'{browser_keyring_name} Keys', | ||||||
|             network_wallet |             network_wallet, | ||||||
|         ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) |         ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) | ||||||
|  |  | ||||||
|         if returncode: |         if returncode: | ||||||
| @@ -899,9 +955,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): | |||||||
|         for item in col.get_all_items(): |         for item in col.get_all_items(): | ||||||
|             if item.get_label() == f'{browser_keyring_name} Safe Storage': |             if item.get_label() == f'{browser_keyring_name} Safe Storage': | ||||||
|                 return item.get_secret() |                 return item.get_secret() | ||||||
|         else: |         logger.error('failed to read from keyring') | ||||||
|             logger.error('failed to read from keyring') |         return b'' | ||||||
|             return b'' |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def _get_linux_keyring_password(browser_keyring_name, keyring, logger): | def _get_linux_keyring_password(browser_keyring_name, keyring, logger): | ||||||
| @@ -947,7 +1002,7 @@ def _get_windows_v10_key(browser_root, logger): | |||||||
|     References: |     References: | ||||||
|         - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc |         - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc | ||||||
|     """ |     """ | ||||||
|     path = _find_most_recently_used_file(browser_root, 'Local State', logger) |     path = _newest(_find_files(browser_root, 'Local State', logger)) | ||||||
|     if path is None: |     if path is None: | ||||||
|         logger.error('could not find local state file') |         logger.error('could not find local state file') | ||||||
|         return None |         return None | ||||||
| @@ -970,13 +1025,15 @@ def _get_windows_v10_key(browser_root, logger): | |||||||
|  |  | ||||||
|  |  | ||||||
| def pbkdf2_sha1(password, salt, iterations, key_length): | def pbkdf2_sha1(password, salt, iterations, key_length): | ||||||
|     return pbkdf2_hmac('sha1', password, salt, iterations, key_length) |     return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): | def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False): | ||||||
|     for key in keys: |     for key in keys: | ||||||
|         plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) |         plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) | ||||||
|         try: |         try: | ||||||
|  |             if hash_prefix: | ||||||
|  |                 return plaintext[32:].decode() | ||||||
|             return plaintext.decode() |             return plaintext.decode() | ||||||
|         except UnicodeDecodeError: |         except UnicodeDecodeError: | ||||||
|             pass |             pass | ||||||
| @@ -984,7 +1041,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' | |||||||
|     return None |     return None | ||||||
|  |  | ||||||
|  |  | ||||||
| def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): | def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False): | ||||||
|     try: |     try: | ||||||
|         plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) |         plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) | ||||||
|     except ValueError: |     except ValueError: | ||||||
| @@ -992,6 +1049,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): | |||||||
|         return None |         return None | ||||||
|  |  | ||||||
|     try: |     try: | ||||||
|  |         if hash_prefix: | ||||||
|  |             return plaintext[32:].decode() | ||||||
|         return plaintext.decode() |         return plaintext.decode() | ||||||
|     except UnicodeDecodeError: |     except UnicodeDecodeError: | ||||||
|         logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) |         logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) | ||||||
| @@ -1021,11 +1080,12 @@ def _decrypt_windows_dpapi(ciphertext, logger): | |||||||
|         None,  # pvReserved: must be NULL |         None,  # pvReserved: must be NULL | ||||||
|         None,  # pPromptStruct: information about prompts to display |         None,  # pPromptStruct: information about prompts to display | ||||||
|         0,  # dwFlags |         0,  # dwFlags | ||||||
|         ctypes.byref(blob_out)  # pDataOut |         ctypes.byref(blob_out),  # pDataOut | ||||||
|     ) |     ) | ||||||
|     if not ret: |     if not ret: | ||||||
|         logger.warning('failed to decrypt with DPAPI', only_once=True) |         message = 'Failed to decrypt with DPAPI. See  https://github.com/yt-dlp/yt-dlp/issues/10927  for more info' | ||||||
|         return None |         logger.error(message) | ||||||
|  |         raise DownloadError(message)  # force exit | ||||||
|  |  | ||||||
|     result = ctypes.string_at(blob_out.pbData, blob_out.cbData) |     result = ctypes.string_at(blob_out.pbData, blob_out.cbData) | ||||||
|     ctypes.windll.kernel32.LocalFree(blob_out.pbData) |     ctypes.windll.kernel32.LocalFree(blob_out.pbData) | ||||||
| @@ -1049,17 +1109,20 @@ def _get_column_names(cursor, table_name): | |||||||
|     return [row[1].decode() for row in table_info] |     return [row[1].decode() for row in table_info] | ||||||
|  |  | ||||||
|  |  | ||||||
| def _find_most_recently_used_file(root, filename, logger): | def _newest(files): | ||||||
|  |     return max(files, key=lambda path: os.lstat(path).st_mtime, default=None) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _find_files(root, filename, logger): | ||||||
|     # if there are multiple browser profiles, take the most recently used one |     # if there are multiple browser profiles, take the most recently used one | ||||||
|     i, paths = 0, [] |     i = 0 | ||||||
|     with _create_progress_bar(logger) as progress_bar: |     with _create_progress_bar(logger) as progress_bar: | ||||||
|         for curr_root, dirs, files in os.walk(root): |         for curr_root, _, files in os.walk(root): | ||||||
|             for file in files: |             for file in files: | ||||||
|                 i += 1 |                 i += 1 | ||||||
|                 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') |                 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') | ||||||
|                 if file == filename: |                 if file == filename: | ||||||
|                     paths.append(os.path.join(curr_root, file)) |                     yield os.path.join(curr_root, file) | ||||||
|     return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def _merge_cookie_jars(jars): | def _merge_cookie_jars(jars): | ||||||
| @@ -1073,7 +1136,7 @@ def _merge_cookie_jars(jars): | |||||||
|  |  | ||||||
|  |  | ||||||
| def _is_path(value): | def _is_path(value): | ||||||
|     return os.path.sep in value |     return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): | def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): | ||||||
| @@ -1094,24 +1157,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | |||||||
|     _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') |     _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') | ||||||
|  |  | ||||||
|     _RESERVED = { |     _RESERVED = { | ||||||
|         "expires", |         'expires', | ||||||
|         "path", |         'path', | ||||||
|         "comment", |         'comment', | ||||||
|         "domain", |         'domain', | ||||||
|         "max-age", |         'max-age', | ||||||
|         "secure", |         'secure', | ||||||
|         "httponly", |         'httponly', | ||||||
|         "version", |         'version', | ||||||
|         "samesite", |         'samesite', | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     _FLAGS = {"secure", "httponly"} |     _FLAGS = {'secure', 'httponly'} | ||||||
|  |  | ||||||
|     # Added 'bad' group to catch the remaining value |     # Added 'bad' group to catch the remaining value | ||||||
|     _COOKIE_PATTERN = re.compile(r""" |     _COOKIE_PATTERN = re.compile(r''' | ||||||
|         \s*                            # Optional whitespace at start of cookie |         \s*                            # Optional whitespace at start of cookie | ||||||
|         (?P<key>                       # Start of group 'key' |         (?P<key>                       # Start of group 'key' | ||||||
|         [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter |         [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter | ||||||
|         )                              # End of group 'key' |         )                              # End of group 'key' | ||||||
|         (                              # Optional group: there may not be a value. |         (                              # Optional group: there may not be a value. | ||||||
|         \s*=\s*                          # Equal Sign |         \s*=\s*                          # Equal Sign | ||||||
| @@ -1121,7 +1184,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | |||||||
|         |                                    # or |         |                                    # or | ||||||
|         \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr |         \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | ||||||
|         |                                    # or |         |                                    # or | ||||||
|         [""" + _LEGAL_VALUE_CHARS + r"""]*     # Any word or empty string |         [''' + _LEGAL_VALUE_CHARS + r''']*     # Any word or empty string | ||||||
|         )                                  # End of group 'val' |         )                                  # End of group 'val' | ||||||
|         |                                  # or |         |                                  # or | ||||||
|         (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values |         (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values | ||||||
| @@ -1129,7 +1192,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | |||||||
|         )?                             # End of optional value group |         )?                             # End of optional value group | ||||||
|         \s*                            # Any number of spaces. |         \s*                            # Any number of spaces. | ||||||
|         (\s+|;|$)                      # Ending either at space, semicolon, or EOS. |         (\s+|;|$)                      # Ending either at space, semicolon, or EOS. | ||||||
|         """, re.ASCII | re.VERBOSE) |         ''', re.ASCII | re.VERBOSE) | ||||||
|  |  | ||||||
|     def load(self, data): |     def load(self, data): | ||||||
|         # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 |         # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 | ||||||
| @@ -1216,8 +1279,8 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | |||||||
|     def _really_save(self, f, ignore_discard, ignore_expires): |     def _really_save(self, f, ignore_discard, ignore_expires): | ||||||
|         now = time.time() |         now = time.time() | ||||||
|         for cookie in self: |         for cookie in self: | ||||||
|             if (not ignore_discard and cookie.discard |             if ((not ignore_discard and cookie.discard) | ||||||
|                     or not ignore_expires and cookie.is_expired(now)): |                     or (not ignore_expires and cookie.is_expired(now))): | ||||||
|                 continue |                 continue | ||||||
|             name, value = cookie.name, cookie.value |             name, value = cookie.name, cookie.value | ||||||
|             if value is None: |             if value is None: | ||||||
| @@ -1225,14 +1288,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | |||||||
|                 # with no name, whereas http.cookiejar regards it as a |                 # with no name, whereas http.cookiejar regards it as a | ||||||
|                 # cookie with no value. |                 # cookie with no value. | ||||||
|                 name, value = '', name |                 name, value = '', name | ||||||
|             f.write('%s\n' % '\t'.join(( |             f.write('{}\n'.format('\t'.join(( | ||||||
|                 cookie.domain, |                 cookie.domain, | ||||||
|                 self._true_or_false(cookie.domain.startswith('.')), |                 self._true_or_false(cookie.domain.startswith('.')), | ||||||
|                 cookie.path, |                 cookie.path, | ||||||
|                 self._true_or_false(cookie.secure), |                 self._true_or_false(cookie.secure), | ||||||
|                 str_or_none(cookie.expires, default=''), |                 str_or_none(cookie.expires, default=''), | ||||||
|                 name, value |                 name, value, | ||||||
|             ))) |             )))) | ||||||
|  |  | ||||||
|     def save(self, filename=None, ignore_discard=True, ignore_expires=True): |     def save(self, filename=None, ignore_discard=True, ignore_expires=True): | ||||||
|         """ |         """ | ||||||
| @@ -1271,10 +1334,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | |||||||
|                 return line |                 return line | ||||||
|             cookie_list = line.split('\t') |             cookie_list = line.split('\t') | ||||||
|             if len(cookie_list) != self._ENTRY_LEN: |             if len(cookie_list) != self._ENTRY_LEN: | ||||||
|                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) |                 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') | ||||||
|             cookie = self._CookieFileEntry(*cookie_list) |             cookie = self._CookieFileEntry(*cookie_list) | ||||||
|             if cookie.expires_at and not cookie.expires_at.isdigit(): |             if cookie.expires_at and not cookie.expires_at.isdigit(): | ||||||
|                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) |                 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') | ||||||
|             return line |             return line | ||||||
|  |  | ||||||
|         cf = io.StringIO() |         cf = io.StringIO() | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ try: | |||||||
|         from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5  # noqa: F401 |         from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5  # noqa: F401 | ||||||
|         from Crypto.Hash import CMAC, SHA1  # noqa: F401 |         from Crypto.Hash import CMAC, SHA1  # noqa: F401 | ||||||
|         from Crypto.PublicKey import RSA  # noqa: F401 |         from Crypto.PublicKey import RSA  # noqa: F401 | ||||||
| except ImportError: | except (ImportError, OSError): | ||||||
|     __version__ = f'broken {__version__}'.strip() |     __version__ = f'broken {__version__}'.strip() | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -43,19 +43,28 @@ except Exception as _err: | |||||||
|  |  | ||||||
| try: | try: | ||||||
|     import sqlite3 |     import sqlite3 | ||||||
|  |     # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152 | ||||||
|  |     sqlite3._yt_dlp__version = sqlite3.sqlite_version | ||||||
| except ImportError: | except ImportError: | ||||||
|     # although sqlite3 is part of the standard library, it is possible to compile python without |     # although sqlite3 is part of the standard library, it is possible to compile Python without | ||||||
|     # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 |     # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 | ||||||
|     sqlite3 = None |     sqlite3 = None | ||||||
|  |  | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     import websockets |     import websockets | ||||||
| except (ImportError, SyntaxError): | except ImportError: | ||||||
|     # websockets 3.10 on python 3.6 causes SyntaxError |  | ||||||
|     # See https://github.com/yt-dlp/yt-dlp/issues/2633 |  | ||||||
|     websockets = None |     websockets = None | ||||||
|  |  | ||||||
|  | try: | ||||||
|  |     import urllib3 | ||||||
|  | except ImportError: | ||||||
|  |     urllib3 = None | ||||||
|  |  | ||||||
|  | try: | ||||||
|  |     import requests | ||||||
|  | except ImportError: | ||||||
|  |     requests = None | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     import xattr  # xattr or pyxattr |     import xattr  # xattr or pyxattr | ||||||
| @@ -65,6 +74,10 @@ else: | |||||||
|     if hasattr(xattr, 'set'):  # pyxattr |     if hasattr(xattr, 'set'):  # pyxattr | ||||||
|         xattr._yt_dlp__identifier = 'pyxattr' |         xattr._yt_dlp__identifier = 'pyxattr' | ||||||
|  |  | ||||||
|  | try: | ||||||
|  |     import curl_cffi | ||||||
|  | except ImportError: | ||||||
|  |     curl_cffi = None | ||||||
|  |  | ||||||
| from . import Cryptodome | from . import Cryptodome | ||||||
|  |  | ||||||
|   | |||||||
| @@ -30,11 +30,12 @@ from .hls import HlsFD | |||||||
| from .http import HttpFD | from .http import HttpFD | ||||||
| from .ism import IsmFD | from .ism import IsmFD | ||||||
| from .mhtml import MhtmlFD | from .mhtml import MhtmlFD | ||||||
| from .niconico import NiconicoDmcFD, NiconicoLiveFD | from .niconico import NiconicoLiveFD | ||||||
| from .rtmp import RtmpFD | from .rtmp import RtmpFD | ||||||
| from .rtsp import RtspFD | from .rtsp import RtspFD | ||||||
| from .websocket import WebSocketFragmentFD | from .websocket import WebSocketFragmentFD | ||||||
| from .youtube_live_chat import YoutubeLiveChatFD | from .youtube_live_chat import YoutubeLiveChatFD | ||||||
|  | from .bunnycdn import BunnyCdnFD | ||||||
|  |  | ||||||
| PROTOCOL_MAP = { | PROTOCOL_MAP = { | ||||||
|     'rtmp': RtmpFD, |     'rtmp': RtmpFD, | ||||||
| @@ -49,12 +50,12 @@ PROTOCOL_MAP = { | |||||||
|     'http_dash_segments_generator': DashSegmentsFD, |     'http_dash_segments_generator': DashSegmentsFD, | ||||||
|     'ism': IsmFD, |     'ism': IsmFD, | ||||||
|     'mhtml': MhtmlFD, |     'mhtml': MhtmlFD, | ||||||
|     'niconico_dmc': NiconicoDmcFD, |  | ||||||
|     'niconico_live': NiconicoLiveFD, |     'niconico_live': NiconicoLiveFD, | ||||||
|     'fc2_live': FC2LiveFD, |     'fc2_live': FC2LiveFD, | ||||||
|     'websocket_frag': WebSocketFragmentFD, |     'websocket_frag': WebSocketFragmentFD, | ||||||
|     'youtube_live_chat': YoutubeLiveChatFD, |     'youtube_live_chat': YoutubeLiveChatFD, | ||||||
|     'youtube_live_chat_replay': YoutubeLiveChatFD, |     'youtube_live_chat_replay': YoutubeLiveChatFD, | ||||||
|  |     'bunnycdn': BunnyCdnFD, | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -65,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False): | |||||||
|         'rtmp_ffmpeg': 'rtmpF', |         'rtmp_ffmpeg': 'rtmpF', | ||||||
|         'http_dash_segments': 'dash', |         'http_dash_segments': 'dash', | ||||||
|         'http_dash_segments_generator': 'dashG', |         'http_dash_segments_generator': 'dashG', | ||||||
|         'niconico_dmc': 'dmc', |  | ||||||
|         'websocket_frag': 'WSfrag', |         'websocket_frag': 'WSfrag', | ||||||
|     } |     } | ||||||
|     if simplify: |     if simplify: | ||||||
|   | |||||||
							
								
								
									
										50
									
								
								plugins/youtube_download/yt_dlp/downloader/bunnycdn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								plugins/youtube_download/yt_dlp/downloader/bunnycdn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | |||||||
|  | import hashlib | ||||||
|  | import random | ||||||
|  | import threading | ||||||
|  |  | ||||||
|  | from .common import FileDownloader | ||||||
|  | from . import HlsFD | ||||||
|  | from ..networking import Request | ||||||
|  | from ..networking.exceptions import network_exceptions | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class BunnyCdnFD(FileDownloader): | ||||||
|  |     """ | ||||||
|  |     Downloads from BunnyCDN with required pings | ||||||
|  |     Note, this is not a part of public API, and will be removed without notice. | ||||||
|  |     DO NOT USE | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def real_download(self, filename, info_dict): | ||||||
|  |         self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN') | ||||||
|  |  | ||||||
|  |         fd = HlsFD(self.ydl, self.params) | ||||||
|  |  | ||||||
|  |         stop_event = threading.Event() | ||||||
|  |         ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data']) | ||||||
|  |         ping_thread.start() | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             return fd.real_download(filename, info_dict) | ||||||
|  |         finally: | ||||||
|  |             stop_event.set() | ||||||
|  |  | ||||||
|  |     def ping_thread(self, stop_event, url, headers, secret, context_id): | ||||||
|  |         # Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work. | ||||||
|  |         ping_interval = 2 | ||||||
|  |         # Hard coded resolution as it doesn't seem to matter | ||||||
|  |         res = 1080 | ||||||
|  |         paused = 'false' | ||||||
|  |         current_time = 0 | ||||||
|  |  | ||||||
|  |         while not stop_event.wait(ping_interval): | ||||||
|  |             current_time += ping_interval | ||||||
|  |  | ||||||
|  |             time = current_time + round(random.random(), 6) | ||||||
|  |             md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest() | ||||||
|  |             ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}' | ||||||
|  |  | ||||||
|  |             try: | ||||||
|  |                 self.ydl.urlopen(Request(ping_url, headers=headers)).read() | ||||||
|  |             except network_exceptions as e: | ||||||
|  |                 self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}') | ||||||
| @@ -4,6 +4,7 @@ import functools | |||||||
| import os | import os | ||||||
| import random | import random | ||||||
| import re | import re | ||||||
|  | import threading | ||||||
| import time | import time | ||||||
|  |  | ||||||
| from ..minicurses import ( | from ..minicurses import ( | ||||||
| @@ -19,9 +20,7 @@ from ..utils import ( | |||||||
|     Namespace, |     Namespace, | ||||||
|     RetryManager, |     RetryManager, | ||||||
|     classproperty, |     classproperty, | ||||||
|     decodeArgument, |  | ||||||
|     deprecation_warning, |     deprecation_warning, | ||||||
|     encodeFilename, |  | ||||||
|     format_bytes, |     format_bytes, | ||||||
|     join_nonempty, |     join_nonempty, | ||||||
|     parse_bytes, |     parse_bytes, | ||||||
| @@ -32,6 +31,7 @@ from ..utils import ( | |||||||
|     timetuple_from_msec, |     timetuple_from_msec, | ||||||
|     try_call, |     try_call, | ||||||
| ) | ) | ||||||
|  | from ..utils._utils import _ProgressState | ||||||
|  |  | ||||||
|  |  | ||||||
| class FileDownloader: | class FileDownloader: | ||||||
| @@ -63,6 +63,7 @@ class FileDownloader: | |||||||
|     min_filesize:       Skip files smaller than this size |     min_filesize:       Skip files smaller than this size | ||||||
|     max_filesize:       Skip files larger than this size |     max_filesize:       Skip files larger than this size | ||||||
|     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. |     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. | ||||||
|  |     progress_delta:     The minimum time between progress output, in seconds | ||||||
|     external_downloader_args:  A dictionary of downloader keys (in lower case) |     external_downloader_args:  A dictionary of downloader keys (in lower case) | ||||||
|                         and a list of additional command-line arguments for the |                         and a list of additional command-line arguments for the | ||||||
|                         executable. Use 'default' as the name for arguments to be |                         executable. Use 'default' as the name for arguments to be | ||||||
| @@ -88,6 +89,9 @@ class FileDownloader: | |||||||
|         self.params = params |         self.params = params | ||||||
|         self._prepare_multiline_status() |         self._prepare_multiline_status() | ||||||
|         self.add_progress_hook(self.report_progress) |         self.add_progress_hook(self.report_progress) | ||||||
|  |         if self.params.get('progress_delta'): | ||||||
|  |             self._progress_delta_lock = threading.Lock() | ||||||
|  |             self._progress_delta_time = time.monotonic() | ||||||
|  |  | ||||||
|     def _set_ydl(self, ydl): |     def _set_ydl(self, ydl): | ||||||
|         self.ydl = ydl |         self.ydl = ydl | ||||||
| @@ -214,7 +218,7 @@ class FileDownloader: | |||||||
|     def temp_name(self, filename): |     def temp_name(self, filename): | ||||||
|         """Returns a temporary filename for the given filename.""" |         """Returns a temporary filename for the given filename.""" | ||||||
|         if self.params.get('nopart', False) or filename == '-' or \ |         if self.params.get('nopart', False) or filename == '-' or \ | ||||||
|                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): |                 (os.path.exists(filename) and not os.path.isfile(filename)): | ||||||
|             return filename |             return filename | ||||||
|         return filename + '.part' |         return filename + '.part' | ||||||
|  |  | ||||||
| @@ -268,7 +272,7 @@ class FileDownloader: | |||||||
|         """Try to set the last-modified time of the given file.""" |         """Try to set the last-modified time of the given file.""" | ||||||
|         if last_modified_hdr is None: |         if last_modified_hdr is None: | ||||||
|             return |             return | ||||||
|         if not os.path.isfile(encodeFilename(filename)): |         if not os.path.isfile(filename): | ||||||
|             return |             return | ||||||
|         timestr = last_modified_hdr |         timestr = last_modified_hdr | ||||||
|         if timestr is None: |         if timestr is None: | ||||||
| @@ -330,7 +334,7 @@ class FileDownloader: | |||||||
|             progress_dict), s.get('progress_idx') or 0) |             progress_dict), s.get('progress_idx') or 0) | ||||||
|         self.to_console_title(self.ydl.evaluate_outtmpl( |         self.to_console_title(self.ydl.evaluate_outtmpl( | ||||||
|             progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', |             progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', | ||||||
|             progress_dict)) |             progress_dict), _ProgressState.from_dict(s), s.get('_percent')) | ||||||
|  |  | ||||||
|     def _format_progress(self, *args, **kwargs): |     def _format_progress(self, *args, **kwargs): | ||||||
|         return self.ydl._format_text( |         return self.ydl._format_text( | ||||||
| @@ -354,6 +358,7 @@ class FileDownloader: | |||||||
|                 '_speed_str': self.format_speed(speed).strip(), |                 '_speed_str': self.format_speed(speed).strip(), | ||||||
|                 '_total_bytes_str': _format_bytes('total_bytes'), |                 '_total_bytes_str': _format_bytes('total_bytes'), | ||||||
|                 '_elapsed_str': self.format_seconds(s.get('elapsed')), |                 '_elapsed_str': self.format_seconds(s.get('elapsed')), | ||||||
|  |                 '_percent': 100.0, | ||||||
|                 '_percent_str': self.format_percent(100), |                 '_percent_str': self.format_percent(100), | ||||||
|             }) |             }) | ||||||
|             self._report_progress_status(s, join_nonempty( |             self._report_progress_status(s, join_nonempty( | ||||||
| @@ -366,13 +371,21 @@ class FileDownloader: | |||||||
|         if s['status'] != 'downloading': |         if s['status'] != 'downloading': | ||||||
|             return |             return | ||||||
|  |  | ||||||
|  |         if update_delta := self.params.get('progress_delta'): | ||||||
|  |             with self._progress_delta_lock: | ||||||
|  |                 if time.monotonic() < self._progress_delta_time: | ||||||
|  |                     return | ||||||
|  |                 self._progress_delta_time += update_delta | ||||||
|  |  | ||||||
|  |         progress = try_call( | ||||||
|  |             lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], | ||||||
|  |             lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], | ||||||
|  |             lambda: s['downloaded_bytes'] == 0 and 0) | ||||||
|         s.update({ |         s.update({ | ||||||
|             '_eta_str': self.format_eta(s.get('eta')).strip(), |             '_eta_str': self.format_eta(s.get('eta')).strip(), | ||||||
|             '_speed_str': self.format_speed(s.get('speed')), |             '_speed_str': self.format_speed(s.get('speed')), | ||||||
|             '_percent_str': self.format_percent(try_call( |             '_percent': progress, | ||||||
|                 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], |             '_percent_str': self.format_percent(progress), | ||||||
|                 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], |  | ||||||
|                 lambda: s['downloaded_bytes'] == 0 and 0)), |  | ||||||
|             '_total_bytes_str': _format_bytes('total_bytes'), |             '_total_bytes_str': _format_bytes('total_bytes'), | ||||||
|             '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), |             '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), | ||||||
|             '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), |             '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), | ||||||
| @@ -393,7 +406,7 @@ class FileDownloader: | |||||||
|  |  | ||||||
|     def report_resuming_byte(self, resume_len): |     def report_resuming_byte(self, resume_len): | ||||||
|         """Report attempt to resume at given byte.""" |         """Report attempt to resume at given byte.""" | ||||||
|         self.to_screen('[download] Resuming download at byte %s' % resume_len) |         self.to_screen(f'[download] Resuming download at byte {resume_len}') | ||||||
|  |  | ||||||
|     def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): |     def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): | ||||||
|         """Report retry""" |         """Report retry""" | ||||||
| @@ -421,13 +434,13 @@ class FileDownloader: | |||||||
|         """ |         """ | ||||||
|         nooverwrites_and_exists = ( |         nooverwrites_and_exists = ( | ||||||
|             not self.params.get('overwrites', True) |             not self.params.get('overwrites', True) | ||||||
|             and os.path.exists(encodeFilename(filename)) |             and os.path.exists(filename) | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         if not hasattr(filename, 'write'): |         if not hasattr(filename, 'write'): | ||||||
|             continuedl_and_exists = ( |             continuedl_and_exists = ( | ||||||
|                 self.params.get('continuedl', True) |                 self.params.get('continuedl', True) | ||||||
|                 and os.path.isfile(encodeFilename(filename)) |                 and os.path.isfile(filename) | ||||||
|                 and not self.params.get('nopart', False) |                 and not self.params.get('nopart', False) | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
| @@ -437,7 +450,7 @@ class FileDownloader: | |||||||
|                 self._hook_progress({ |                 self._hook_progress({ | ||||||
|                     'filename': filename, |                     'filename': filename, | ||||||
|                     'status': 'finished', |                     'status': 'finished', | ||||||
|                     'total_bytes': os.path.getsize(encodeFilename(filename)), |                     'total_bytes': os.path.getsize(filename), | ||||||
|                 }, info_dict) |                 }, info_dict) | ||||||
|                 self._finish_multiline_status() |                 self._finish_multiline_status() | ||||||
|                 return True, False |                 return True, False | ||||||
| @@ -478,9 +491,7 @@ class FileDownloader: | |||||||
|         if not self.params.get('verbose', False): |         if not self.params.get('verbose', False): | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         str_args = [decodeArgument(a) for a in args] |  | ||||||
|  |  | ||||||
|         if exe is None: |         if exe is None: | ||||||
|             exe = os.path.basename(str_args[0]) |             exe = os.path.basename(args[0]) | ||||||
|  |  | ||||||
|         self.write_debug(f'{exe} command line: {shell_quote(str_args)}') |         self.write_debug(f'{exe} command line: {shell_quote(args)}') | ||||||
|   | |||||||
| @@ -15,12 +15,15 @@ class DashSegmentsFD(FragmentFD): | |||||||
|     FD_NAME = 'dashsegments' |     FD_NAME = 'dashsegments' | ||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: |         if 'http_dash_segments_generator' in info_dict['protocol'].split('+'): | ||||||
|             self.report_error('Live DASH videos are not supported') |             real_downloader = None  # No external FD can support --live-from-start | ||||||
|  |         else: | ||||||
|  |             if info_dict.get('is_live'): | ||||||
|  |                 self.report_error('Live DASH videos are not supported') | ||||||
|  |             real_downloader = get_suitable_downloader( | ||||||
|  |                 info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) | ||||||
|  |  | ||||||
|         real_start = time.time() |         real_start = time.time() | ||||||
|         real_downloader = get_suitable_downloader( |  | ||||||
|             info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) |  | ||||||
|  |  | ||||||
|         requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] |         requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] | ||||||
|         args = [] |         args = [] | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| import enum | import enum | ||||||
|  | import functools | ||||||
| import json | import json | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| @@ -9,7 +10,6 @@ import time | |||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from .fragment import FragmentFD | from .fragment import FragmentFD | ||||||
| from ..compat import functools |  | ||||||
| from ..networking import Request | from ..networking import Request | ||||||
| from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor | from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @@ -23,7 +23,6 @@ from ..utils import ( | |||||||
|     cli_valueless_option, |     cli_valueless_option, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|     encodeArgument, |     encodeArgument, | ||||||
|     encodeFilename, |  | ||||||
|     find_available_port, |     find_available_port, | ||||||
|     remove_end, |     remove_end, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
| @@ -55,7 +54,7 @@ class ExternalFD(FragmentFD): | |||||||
|             # correct and expected termination thus all postprocessing |             # correct and expected termination thus all postprocessing | ||||||
|             # should take place |             # should take place | ||||||
|             retval = 0 |             retval = 0 | ||||||
|             self.to_screen('[%s] Interrupted by user' % self.get_basename()) |             self.to_screen(f'[{self.get_basename()}] Interrupted by user') | ||||||
|         finally: |         finally: | ||||||
|             if self._cookies_tempfile: |             if self._cookies_tempfile: | ||||||
|                 self.try_remove(self._cookies_tempfile) |                 self.try_remove(self._cookies_tempfile) | ||||||
| @@ -67,7 +66,7 @@ class ExternalFD(FragmentFD): | |||||||
|                 'elapsed': time.time() - started, |                 'elapsed': time.time() - started, | ||||||
|             } |             } | ||||||
|             if filename != '-': |             if filename != '-': | ||||||
|                 fsize = os.path.getsize(encodeFilename(tmpfilename)) |                 fsize = os.path.getsize(tmpfilename) | ||||||
|                 self.try_rename(tmpfilename, filename) |                 self.try_rename(tmpfilename, filename) | ||||||
|                 status.update({ |                 status.update({ | ||||||
|                     'downloaded_bytes': fsize, |                     'downloaded_bytes': fsize, | ||||||
| @@ -108,7 +107,7 @@ class ExternalFD(FragmentFD): | |||||||
|         return all(( |         return all(( | ||||||
|             not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, |             not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, | ||||||
|             '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, |             '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, | ||||||
|             not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), |             not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'), | ||||||
|             all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), |             all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), | ||||||
|         )) |         )) | ||||||
|  |  | ||||||
| @@ -172,7 +171,7 @@ class ExternalFD(FragmentFD): | |||||||
|         decrypt_fragment = self.decrypter(info_dict) |         decrypt_fragment = self.decrypter(info_dict) | ||||||
|         dest, _ = self.sanitize_open(tmpfilename, 'wb') |         dest, _ = self.sanitize_open(tmpfilename, 'wb') | ||||||
|         for frag_index, fragment in enumerate(info_dict['fragments']): |         for frag_index, fragment in enumerate(info_dict['fragments']): | ||||||
|             fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) |             fragment_filename = f'{tmpfilename}-Frag{frag_index}' | ||||||
|             try: |             try: | ||||||
|                 src, _ = self.sanitize_open(fragment_filename, 'rb') |                 src, _ = self.sanitize_open(fragment_filename, 'rb') | ||||||
|             except OSError as err: |             except OSError as err: | ||||||
| @@ -184,9 +183,9 @@ class ExternalFD(FragmentFD): | |||||||
|             dest.write(decrypt_fragment(fragment, src.read())) |             dest.write(decrypt_fragment(fragment, src.read())) | ||||||
|             src.close() |             src.close() | ||||||
|             if not self.params.get('keep_fragments', False): |             if not self.params.get('keep_fragments', False): | ||||||
|                 self.try_remove(encodeFilename(fragment_filename)) |                 self.try_remove(fragment_filename) | ||||||
|         dest.close() |         dest.close() | ||||||
|         self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) |         self.try_remove(f'{tmpfilename}.frag.urls') | ||||||
|         return 0 |         return 0 | ||||||
|  |  | ||||||
|     def _call_process(self, cmd, info_dict): |     def _call_process(self, cmd, info_dict): | ||||||
| @@ -335,12 +334,12 @@ class Aria2cFD(ExternalFD): | |||||||
|         cmd += ['--auto-file-renaming=false'] |         cmd += ['--auto-file-renaming=false'] | ||||||
|  |  | ||||||
|         if 'fragments' in info_dict: |         if 'fragments' in info_dict: | ||||||
|             cmd += ['--file-allocation=none', '--uri-selector=inorder'] |             cmd += ['--uri-selector=inorder'] | ||||||
|             url_list_file = '%s.frag.urls' % tmpfilename |             url_list_file = f'{tmpfilename}.frag.urls' | ||||||
|             url_list = [] |             url_list = [] | ||||||
|             for frag_index, fragment in enumerate(info_dict['fragments']): |             for frag_index, fragment in enumerate(info_dict['fragments']): | ||||||
|                 fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) |                 fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' | ||||||
|                 url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) |                 url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) | ||||||
|             stream, _ = self.sanitize_open(url_list_file, 'wb') |             stream, _ = self.sanitize_open(url_list_file, 'wb') | ||||||
|             stream.write('\n'.join(url_list).encode()) |             stream.write('\n'.join(url_list).encode()) | ||||||
|             stream.close() |             stream.close() | ||||||
| @@ -357,7 +356,7 @@ class Aria2cFD(ExternalFD): | |||||||
|             'id': sanitycheck, |             'id': sanitycheck, | ||||||
|             'method': method, |             'method': method, | ||||||
|             'params': [f'token:{rpc_secret}', *params], |             'params': [f'token:{rpc_secret}', *params], | ||||||
|         }).encode('utf-8') |         }).encode() | ||||||
|         request = Request( |         request = Request( | ||||||
|             f'http://localhost:{rpc_port}/jsonrpc', |             f'http://localhost:{rpc_port}/jsonrpc', | ||||||
|             data=d, headers={ |             data=d, headers={ | ||||||
| @@ -416,7 +415,7 @@ class Aria2cFD(ExternalFD): | |||||||
|                     'total_bytes_estimate': total, |                     'total_bytes_estimate': total, | ||||||
|                     'eta': (total - downloaded) / (speed or 1), |                     'eta': (total - downloaded) / (speed or 1), | ||||||
|                     'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, |                     'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, | ||||||
|                     'elapsed': time.time() - started |                     'elapsed': time.time() - started, | ||||||
|                 }) |                 }) | ||||||
|                 self._hook_progress(status, info_dict) |                 self._hook_progress(status, info_dict) | ||||||
|  |  | ||||||
| @@ -458,8 +457,6 @@ class FFmpegFD(ExternalFD): | |||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def available(cls, path=None): |     def available(cls, path=None): | ||||||
|         # TODO: Fix path for ffmpeg |  | ||||||
|         # Fixme: This may be wrong when --ffmpeg-location is used |  | ||||||
|         return FFmpegPostProcessor().available |         return FFmpegPostProcessor().available | ||||||
|  |  | ||||||
|     def on_process_started(self, proc, stdin): |     def on_process_started(self, proc, stdin): | ||||||
| @@ -491,7 +488,7 @@ class FFmpegFD(ExternalFD): | |||||||
|         if not self.params.get('verbose'): |         if not self.params.get('verbose'): | ||||||
|             args += ['-hide_banner'] |             args += ['-hide_banner'] | ||||||
|  |  | ||||||
|         args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) |         args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...)) | ||||||
|  |  | ||||||
|         # These exists only for compatibility. Extractors should use |         # These exists only for compatibility. Extractors should use | ||||||
|         # info_dict['downloader_options']['ffmpeg_args'] instead |         # info_dict['downloader_options']['ffmpeg_args'] instead | ||||||
| @@ -508,13 +505,13 @@ class FFmpegFD(ExternalFD): | |||||||
|         env = None |         env = None | ||||||
|         proxy = self.params.get('proxy') |         proxy = self.params.get('proxy') | ||||||
|         if proxy: |         if proxy: | ||||||
|             if not re.match(r'^[\da-zA-Z]+://', proxy): |             if not re.match(r'[\da-zA-Z]+://', proxy): | ||||||
|                 proxy = 'http://%s' % proxy |                 proxy = f'http://{proxy}' | ||||||
|  |  | ||||||
|             if proxy.startswith('socks'): |             if proxy.startswith('socks'): | ||||||
|                 self.report_warning( |                 self.report_warning( | ||||||
|                     '%s does not support SOCKS proxies. Downloading is likely to fail. ' |                     f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' | ||||||
|                     'Consider adding --hls-prefer-native to your command.' % self.get_basename()) |                     'Consider adding --hls-prefer-native to your command.') | ||||||
|  |  | ||||||
|             # Since December 2015 ffmpeg supports -http_proxy option (see |             # Since December 2015 ffmpeg supports -http_proxy option (see | ||||||
|             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) |             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) | ||||||
| @@ -559,7 +556,7 @@ class FFmpegFD(ExternalFD): | |||||||
|  |  | ||||||
|         selected_formats = info_dict.get('requested_formats') or [info_dict] |         selected_formats = info_dict.get('requested_formats') or [info_dict] | ||||||
|         for i, fmt in enumerate(selected_formats): |         for i, fmt in enumerate(selected_formats): | ||||||
|             is_http = re.match(r'^https?://', fmt['url']) |             is_http = re.match(r'https?://', fmt['url']) | ||||||
|             cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] |             cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] | ||||||
|             if cookies: |             if cookies: | ||||||
|                 args.extend(['-cookies', ''.join( |                 args.extend(['-cookies', ''.join( | ||||||
| @@ -575,7 +572,7 @@ class FFmpegFD(ExternalFD): | |||||||
|             if end_time: |             if end_time: | ||||||
|                 args += ['-t', str(end_time - start_time)] |                 args += ['-t', str(end_time - start_time)] | ||||||
|  |  | ||||||
|             args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] |             args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] | ||||||
|  |  | ||||||
|         if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): |         if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): | ||||||
|             args += ['-c', 'copy'] |             args += ['-c', 'copy'] | ||||||
| @@ -615,10 +612,12 @@ class FFmpegFD(ExternalFD): | |||||||
|         else: |         else: | ||||||
|             args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] |             args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] | ||||||
|  |  | ||||||
|  |         args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...)) | ||||||
|  |  | ||||||
|         args += self._configuration_args(('_o1', '_o', '')) |         args += self._configuration_args(('_o1', '_o', '')) | ||||||
|  |  | ||||||
|         args = [encodeArgument(opt) for opt in args] |         args = [encodeArgument(opt) for opt in args] | ||||||
|         args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) |         args.append(ffpp._ffmpeg_filename_argument(tmpfilename)) | ||||||
|         self._debug_cmd(args) |         self._debug_cmd(args) | ||||||
|  |  | ||||||
|         piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) |         piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) | ||||||
|   | |||||||
| @@ -67,12 +67,12 @@ class FlvReader(io.BytesIO): | |||||||
|         self.read_bytes(3) |         self.read_bytes(3) | ||||||
|         quality_entry_count = self.read_unsigned_char() |         quality_entry_count = self.read_unsigned_char() | ||||||
|         # QualityEntryCount |         # QualityEntryCount | ||||||
|         for i in range(quality_entry_count): |         for _ in range(quality_entry_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|  |  | ||||||
|         segment_run_count = self.read_unsigned_int() |         segment_run_count = self.read_unsigned_int() | ||||||
|         segments = [] |         segments = [] | ||||||
|         for i in range(segment_run_count): |         for _ in range(segment_run_count): | ||||||
|             first_segment = self.read_unsigned_int() |             first_segment = self.read_unsigned_int() | ||||||
|             fragments_per_segment = self.read_unsigned_int() |             fragments_per_segment = self.read_unsigned_int() | ||||||
|             segments.append((first_segment, fragments_per_segment)) |             segments.append((first_segment, fragments_per_segment)) | ||||||
| @@ -91,12 +91,12 @@ class FlvReader(io.BytesIO): | |||||||
|  |  | ||||||
|         quality_entry_count = self.read_unsigned_char() |         quality_entry_count = self.read_unsigned_char() | ||||||
|         # QualitySegmentUrlModifiers |         # QualitySegmentUrlModifiers | ||||||
|         for i in range(quality_entry_count): |         for _ in range(quality_entry_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|  |  | ||||||
|         fragments_count = self.read_unsigned_int() |         fragments_count = self.read_unsigned_int() | ||||||
|         fragments = [] |         fragments = [] | ||||||
|         for i in range(fragments_count): |         for _ in range(fragments_count): | ||||||
|             first = self.read_unsigned_int() |             first = self.read_unsigned_int() | ||||||
|             first_ts = self.read_unsigned_long_long() |             first_ts = self.read_unsigned_long_long() | ||||||
|             duration = self.read_unsigned_int() |             duration = self.read_unsigned_int() | ||||||
| @@ -135,11 +135,11 @@ class FlvReader(io.BytesIO): | |||||||
|         self.read_string()  # MovieIdentifier |         self.read_string()  # MovieIdentifier | ||||||
|         server_count = self.read_unsigned_char() |         server_count = self.read_unsigned_char() | ||||||
|         # ServerEntryTable |         # ServerEntryTable | ||||||
|         for i in range(server_count): |         for _ in range(server_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|         quality_count = self.read_unsigned_char() |         quality_count = self.read_unsigned_char() | ||||||
|         # QualityEntryTable |         # QualityEntryTable | ||||||
|         for i in range(quality_count): |         for _ in range(quality_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|         # DrmData |         # DrmData | ||||||
|         self.read_string() |         self.read_string() | ||||||
| @@ -148,14 +148,14 @@ class FlvReader(io.BytesIO): | |||||||
|  |  | ||||||
|         segments_count = self.read_unsigned_char() |         segments_count = self.read_unsigned_char() | ||||||
|         segments = [] |         segments = [] | ||||||
|         for i in range(segments_count): |         for _ in range(segments_count): | ||||||
|             box_size, box_type, box_data = self.read_box_info() |             box_size, box_type, box_data = self.read_box_info() | ||||||
|             assert box_type == b'asrt' |             assert box_type == b'asrt' | ||||||
|             segment = FlvReader(box_data).read_asrt() |             segment = FlvReader(box_data).read_asrt() | ||||||
|             segments.append(segment) |             segments.append(segment) | ||||||
|         fragments_run_count = self.read_unsigned_char() |         fragments_run_count = self.read_unsigned_char() | ||||||
|         fragments = [] |         fragments = [] | ||||||
|         for i in range(fragments_run_count): |         for _ in range(fragments_run_count): | ||||||
|             box_size, box_type, box_data = self.read_box_info() |             box_size, box_type, box_data = self.read_box_info() | ||||||
|             assert box_type == b'afrt' |             assert box_type == b'afrt' | ||||||
|             fragments.append(FlvReader(box_data).read_afrt()) |             fragments.append(FlvReader(box_data).read_afrt()) | ||||||
| @@ -309,7 +309,7 @@ class F4mFD(FragmentFD): | |||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         man_url = info_dict['url'] |         man_url = info_dict['url'] | ||||||
|         requested_bitrate = info_dict.get('tbr') |         requested_bitrate = info_dict.get('tbr') | ||||||
|         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) |         self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') | ||||||
|  |  | ||||||
|         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) |         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | ||||||
|         man_url = urlh.url |         man_url = urlh.url | ||||||
| @@ -326,8 +326,8 @@ class F4mFD(FragmentFD): | |||||||
|             formats = sorted(formats, key=lambda f: f[0]) |             formats = sorted(formats, key=lambda f: f[0]) | ||||||
|             rate, media = formats[-1] |             rate, media = formats[-1] | ||||||
|         else: |         else: | ||||||
|             rate, media = list(filter( |             rate, media = next(filter( | ||||||
|                 lambda f: int(f[0]) == requested_bitrate, formats))[0] |                 lambda f: int(f[0]) == requested_bitrate, formats)) | ||||||
|  |  | ||||||
|         # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. |         # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. | ||||||
|         man_base_url = get_base_url(doc) or man_url |         man_base_url = get_base_url(doc) or man_url | ||||||
|   | |||||||
| @@ -9,11 +9,11 @@ import time | |||||||
| from .common import FileDownloader | from .common import FileDownloader | ||||||
| from .http import HttpFD | from .http import HttpFD | ||||||
| from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 | from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 | ||||||
| from ..compat import compat_os_name |  | ||||||
| from ..networking import Request | from ..networking import Request | ||||||
| from ..networking.exceptions import HTTPError, IncompleteRead | from ..networking.exceptions import HTTPError, IncompleteRead | ||||||
| from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj | from ..utils import DownloadError, RetryManager, traverse_obj | ||||||
| from ..utils.networking import HTTPHeaderDict | from ..utils.networking import HTTPHeaderDict | ||||||
|  | from ..utils.progress import ProgressCalculator | ||||||
|  |  | ||||||
|  |  | ||||||
| class HttpQuietDownloader(HttpFD): | class HttpQuietDownloader(HttpFD): | ||||||
| @@ -151,7 +151,7 @@ class FragmentFD(FileDownloader): | |||||||
|             if self.__do_ytdl_file(ctx): |             if self.__do_ytdl_file(ctx): | ||||||
|                 self._write_ytdl_file(ctx) |                 self._write_ytdl_file(ctx) | ||||||
|             if not self.params.get('keep_fragments', False): |             if not self.params.get('keep_fragments', False): | ||||||
|                 self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) |                 self.try_remove(ctx['fragment_filename_sanitized']) | ||||||
|             del ctx['fragment_filename_sanitized'] |             del ctx['fragment_filename_sanitized'] | ||||||
|  |  | ||||||
|     def _prepare_frag_download(self, ctx): |     def _prepare_frag_download(self, ctx): | ||||||
| @@ -187,7 +187,7 @@ class FragmentFD(FileDownloader): | |||||||
|         }) |         }) | ||||||
|  |  | ||||||
|         if self.__do_ytdl_file(ctx): |         if self.__do_ytdl_file(ctx): | ||||||
|             ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) |             ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename'])) | ||||||
|             continuedl = self.params.get('continuedl', True) |             continuedl = self.params.get('continuedl', True) | ||||||
|             if continuedl and ytdl_file_exists: |             if continuedl and ytdl_file_exists: | ||||||
|                 self._read_ytdl_file(ctx) |                 self._read_ytdl_file(ctx) | ||||||
| @@ -198,7 +198,7 @@ class FragmentFD(FileDownloader): | |||||||
|                         '.ytdl file is corrupt' if is_corrupt else |                         '.ytdl file is corrupt' if is_corrupt else | ||||||
|                         'Inconsistent state of incomplete fragment download') |                         'Inconsistent state of incomplete fragment download') | ||||||
|                     self.report_warning( |                     self.report_warning( | ||||||
|                         '%s. Restarting from the beginning ...' % message) |                         f'{message}. Restarting from the beginning ...') | ||||||
|                     ctx['fragment_index'] = resume_len = 0 |                     ctx['fragment_index'] = resume_len = 0 | ||||||
|                     if 'ytdl_corrupt' in ctx: |                     if 'ytdl_corrupt' in ctx: | ||||||
|                         del ctx['ytdl_corrupt'] |                         del ctx['ytdl_corrupt'] | ||||||
| @@ -226,8 +226,7 @@ class FragmentFD(FileDownloader): | |||||||
|         resume_len = ctx['complete_frags_downloaded_bytes'] |         resume_len = ctx['complete_frags_downloaded_bytes'] | ||||||
|         total_frags = ctx['total_frags'] |         total_frags = ctx['total_frags'] | ||||||
|         ctx_id = ctx.get('ctx_id') |         ctx_id = ctx.get('ctx_id') | ||||||
|         # This dict stores the download progress, it's updated by the progress |         # Stores the download progress, updated by the progress hook | ||||||
|         # hook |  | ||||||
|         state = { |         state = { | ||||||
|             'status': 'downloading', |             'status': 'downloading', | ||||||
|             'downloaded_bytes': resume_len, |             'downloaded_bytes': resume_len, | ||||||
| @@ -237,14 +236,8 @@ class FragmentFD(FileDownloader): | |||||||
|             'tmpfilename': ctx['tmpfilename'], |             'tmpfilename': ctx['tmpfilename'], | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         start = time.time() |         ctx['started'] = time.time() | ||||||
|         ctx.update({ |         progress = ProgressCalculator(resume_len) | ||||||
|             'started': start, |  | ||||||
|             'fragment_started': start, |  | ||||||
|             # Amount of fragment's bytes downloaded by the time of the previous |  | ||||||
|             # frag progress hook invocation |  | ||||||
|             'prev_frag_downloaded_bytes': 0, |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|         def frag_progress_hook(s): |         def frag_progress_hook(s): | ||||||
|             if s['status'] not in ('downloading', 'finished'): |             if s['status'] not in ('downloading', 'finished'): | ||||||
| @@ -259,38 +252,35 @@ class FragmentFD(FileDownloader): | |||||||
|             state['max_progress'] = ctx.get('max_progress') |             state['max_progress'] = ctx.get('max_progress') | ||||||
|             state['progress_idx'] = ctx.get('progress_idx') |             state['progress_idx'] = ctx.get('progress_idx') | ||||||
|  |  | ||||||
|             time_now = time.time() |             state['elapsed'] = progress.elapsed | ||||||
|             state['elapsed'] = time_now - start |  | ||||||
|             frag_total_bytes = s.get('total_bytes') or 0 |             frag_total_bytes = s.get('total_bytes') or 0 | ||||||
|             s['fragment_info_dict'] = s.pop('info_dict', {}) |             s['fragment_info_dict'] = s.pop('info_dict', {}) | ||||||
|  |  | ||||||
|  |             # XXX: Fragment resume is not accounted for here | ||||||
|             if not ctx['live']: |             if not ctx['live']: | ||||||
|                 estimated_size = ( |                 estimated_size = ( | ||||||
|                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) |                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) | ||||||
|                     / (state['fragment_index'] + 1) * total_frags) |                     / (state['fragment_index'] + 1) * total_frags) | ||||||
|                 state['total_bytes_estimate'] = estimated_size |                 progress.total = estimated_size | ||||||
|  |                 progress.update(s.get('downloaded_bytes')) | ||||||
|  |                 state['total_bytes_estimate'] = progress.total | ||||||
|  |             else: | ||||||
|  |                 progress.update(s.get('downloaded_bytes')) | ||||||
|  |  | ||||||
|             if s['status'] == 'finished': |             if s['status'] == 'finished': | ||||||
|                 state['fragment_index'] += 1 |                 state['fragment_index'] += 1 | ||||||
|                 ctx['fragment_index'] = state['fragment_index'] |                 ctx['fragment_index'] = state['fragment_index'] | ||||||
|                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] |                 progress.thread_reset() | ||||||
|                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] |  | ||||||
|                 ctx['speed'] = state['speed'] = self.calc_speed( |             state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded | ||||||
|                     ctx['fragment_started'], time_now, frag_total_bytes) |             state['speed'] = ctx['speed'] = progress.speed.smooth | ||||||
|                 ctx['fragment_started'] = time.time() |             state['eta'] = progress.eta.smooth | ||||||
|                 ctx['prev_frag_downloaded_bytes'] = 0 |  | ||||||
|             else: |  | ||||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] |  | ||||||
|                 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] |  | ||||||
|                 ctx['speed'] = state['speed'] = self.calc_speed( |  | ||||||
|                     ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0)) |  | ||||||
|                 if not ctx['live']: |  | ||||||
|                     state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes']) |  | ||||||
|                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes |  | ||||||
|             self._hook_progress(state, info_dict) |             self._hook_progress(state, info_dict) | ||||||
|  |  | ||||||
|         ctx['dl'].add_progress_hook(frag_progress_hook) |         ctx['dl'].add_progress_hook(frag_progress_hook) | ||||||
|  |  | ||||||
|         return start |         return ctx['started'] | ||||||
|  |  | ||||||
|     def _finish_frag_download(self, ctx, info_dict): |     def _finish_frag_download(self, ctx, info_dict): | ||||||
|         ctx['dest_stream'].close() |         ctx['dest_stream'].close() | ||||||
| @@ -375,10 +365,10 @@ class FragmentFD(FileDownloader): | |||||||
|         return decrypt_fragment |         return decrypt_fragment | ||||||
|  |  | ||||||
|     def download_and_append_fragments_multiple(self, *args, **kwargs): |     def download_and_append_fragments_multiple(self, *args, **kwargs): | ||||||
|         ''' |         """ | ||||||
|         @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... |         @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... | ||||||
|                 all args must be either tuple or list |                 all args must be either tuple or list | ||||||
|         ''' |         """ | ||||||
|         interrupt_trigger = [True] |         interrupt_trigger = [True] | ||||||
|         max_progress = len(args) |         max_progress = len(args) | ||||||
|         if max_progress == 1: |         if max_progress == 1: | ||||||
| @@ -399,7 +389,7 @@ class FragmentFD(FileDownloader): | |||||||
|             def __exit__(self, exc_type, exc_val, exc_tb): |             def __exit__(self, exc_type, exc_val, exc_tb): | ||||||
|                 pass |                 pass | ||||||
|  |  | ||||||
|         if compat_os_name == 'nt': |         if os.name == 'nt': | ||||||
|             def future_result(future): |             def future_result(future): | ||||||
|                 while True: |                 while True: | ||||||
|                     try: |                     try: | ||||||
| @@ -433,7 +423,7 @@ class FragmentFD(FileDownloader): | |||||||
|             finally: |             finally: | ||||||
|                 tpe.shutdown(wait=True) |                 tpe.shutdown(wait=True) | ||||||
|         if not interrupt_trigger[0] and not is_live: |         if not interrupt_trigger[0] and not is_live: | ||||||
|             raise KeyboardInterrupt() |             raise KeyboardInterrupt | ||||||
|         # we expect the user wants to stop and DO WANT the preceding postprocessors to run; |         # we expect the user wants to stop and DO WANT the preceding postprocessors to run; | ||||||
|         # so returning a intermediate result here instead of KeyboardInterrupt on live |         # so returning a intermediate result here instead of KeyboardInterrupt on live | ||||||
|         return result |         return result | ||||||
| @@ -500,7 +490,6 @@ class FragmentFD(FileDownloader): | |||||||
|                 download_fragment(fragment, ctx_copy) |                 download_fragment(fragment, ctx_copy) | ||||||
|                 return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') |                 return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') | ||||||
|  |  | ||||||
|             self.report_warning('The download speed shown is only of one thread. This is a known issue') |  | ||||||
|             with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: |             with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: | ||||||
|                 try: |                 try: | ||||||
|                     for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): |                     for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ from ..utils import ( | |||||||
|     update_url_query, |     update_url_query, | ||||||
|     urljoin, |     urljoin, | ||||||
| ) | ) | ||||||
|  | from ..utils._utils import _request_dump_filename | ||||||
|  |  | ||||||
|  |  | ||||||
| class HlsFD(FragmentFD): | class HlsFD(FragmentFD): | ||||||
| @@ -72,11 +73,23 @@ class HlsFD(FragmentFD): | |||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         man_url = info_dict['url'] |         man_url = info_dict['url'] | ||||||
|         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) |  | ||||||
|  |  | ||||||
|         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) |         s = info_dict.get('hls_media_playlist_data') | ||||||
|         man_url = urlh.url |         if s: | ||||||
|         s = urlh.read().decode('utf-8', 'ignore') |             self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info') | ||||||
|  |         else: | ||||||
|  |             self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') | ||||||
|  |             urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | ||||||
|  |             man_url = urlh.url | ||||||
|  |             s_bytes = urlh.read() | ||||||
|  |             if self.params.get('write_pages'): | ||||||
|  |                 dump_filename = _request_dump_filename( | ||||||
|  |                     man_url, info_dict['id'], None, | ||||||
|  |                     trim_length=self.params.get('trim_file_name')) | ||||||
|  |                 self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}') | ||||||
|  |                 with open(dump_filename, 'wb') as outf: | ||||||
|  |                     outf.write(s_bytes) | ||||||
|  |             s = s_bytes.decode('utf-8', 'ignore') | ||||||
|  |  | ||||||
|         can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None |         can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None | ||||||
|         if can_download: |         if can_download: | ||||||
| @@ -119,12 +132,12 @@ class HlsFD(FragmentFD): | |||||||
|             self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') |             self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') | ||||||
|  |  | ||||||
|         def is_ad_fragment_start(s): |         def is_ad_fragment_start(s): | ||||||
|             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s |             return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s) | ||||||
|                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) |                     or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))) | ||||||
|  |  | ||||||
|         def is_ad_fragment_end(s): |         def is_ad_fragment_end(s): | ||||||
|             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s |             return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s) | ||||||
|                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) |                     or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))) | ||||||
|  |  | ||||||
|         fragments = [] |         fragments = [] | ||||||
|  |  | ||||||
| @@ -160,10 +173,12 @@ class HlsFD(FragmentFD): | |||||||
|         extra_state = ctx.setdefault('extra_state', {}) |         extra_state = ctx.setdefault('extra_state', {}) | ||||||
|  |  | ||||||
|         format_index = info_dict.get('format_index') |         format_index = info_dict.get('format_index') | ||||||
|         extra_query = None |         extra_segment_query = None | ||||||
|         extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') |         if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'): | ||||||
|         if extra_param_to_segment_url: |             extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url) | ||||||
|             extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) |         extra_key_query = None | ||||||
|  |         if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'): | ||||||
|  |             extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url) | ||||||
|         i = 0 |         i = 0 | ||||||
|         media_sequence = 0 |         media_sequence = 0 | ||||||
|         decrypt_info = {'METHOD': 'NONE'} |         decrypt_info = {'METHOD': 'NONE'} | ||||||
| @@ -175,6 +190,7 @@ class HlsFD(FragmentFD): | |||||||
|         if external_aes_iv: |         if external_aes_iv: | ||||||
|             external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) |             external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) | ||||||
|         byte_range = {} |         byte_range = {} | ||||||
|  |         byte_range_offset = 0 | ||||||
|         discontinuity_count = 0 |         discontinuity_count = 0 | ||||||
|         frag_index = 0 |         frag_index = 0 | ||||||
|         ad_frag_next = False |         ad_frag_next = False | ||||||
| @@ -190,8 +206,8 @@ class HlsFD(FragmentFD): | |||||||
|                     if frag_index <= ctx['fragment_index']: |                     if frag_index <= ctx['fragment_index']: | ||||||
|                         continue |                         continue | ||||||
|                     frag_url = urljoin(man_url, line) |                     frag_url = urljoin(man_url, line) | ||||||
|                     if extra_query: |                     if extra_segment_query: | ||||||
|                         frag_url = update_url_query(frag_url, extra_query) |                         frag_url = update_url_query(frag_url, extra_segment_query) | ||||||
|  |  | ||||||
|                     fragments.append({ |                     fragments.append({ | ||||||
|                         'frag_index': frag_index, |                         'frag_index': frag_index, | ||||||
| @@ -202,6 +218,11 @@ class HlsFD(FragmentFD): | |||||||
|                     }) |                     }) | ||||||
|                     media_sequence += 1 |                     media_sequence += 1 | ||||||
|  |  | ||||||
|  |                     # If the byte_range is truthy, reset it after appending a fragment that uses it | ||||||
|  |                     if byte_range: | ||||||
|  |                         byte_range_offset = byte_range['end'] | ||||||
|  |                         byte_range = {} | ||||||
|  |  | ||||||
|                 elif line.startswith('#EXT-X-MAP'): |                 elif line.startswith('#EXT-X-MAP'): | ||||||
|                     if format_index and discontinuity_count != format_index: |                     if format_index and discontinuity_count != format_index: | ||||||
|                         continue |                         continue | ||||||
| @@ -212,13 +233,15 @@ class HlsFD(FragmentFD): | |||||||
|                     frag_index += 1 |                     frag_index += 1 | ||||||
|                     map_info = parse_m3u8_attributes(line[11:]) |                     map_info = parse_m3u8_attributes(line[11:]) | ||||||
|                     frag_url = urljoin(man_url, map_info.get('URI')) |                     frag_url = urljoin(man_url, map_info.get('URI')) | ||||||
|                     if extra_query: |                     if extra_segment_query: | ||||||
|                         frag_url = update_url_query(frag_url, extra_query) |                         frag_url = update_url_query(frag_url, extra_segment_query) | ||||||
|  |  | ||||||
|  |                     map_byte_range = {} | ||||||
|  |  | ||||||
|                     if map_info.get('BYTERANGE'): |                     if map_info.get('BYTERANGE'): | ||||||
|                         splitted_byte_range = map_info.get('BYTERANGE').split('@') |                         splitted_byte_range = map_info.get('BYTERANGE').split('@') | ||||||
|                         sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] |                         sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0 | ||||||
|                         byte_range = { |                         map_byte_range = { | ||||||
|                             'start': sub_range_start, |                             'start': sub_range_start, | ||||||
|                             'end': sub_range_start + int(splitted_byte_range[0]), |                             'end': sub_range_start + int(splitted_byte_range[0]), | ||||||
|                         } |                         } | ||||||
| @@ -227,8 +250,8 @@ class HlsFD(FragmentFD): | |||||||
|                         'frag_index': frag_index, |                         'frag_index': frag_index, | ||||||
|                         'url': frag_url, |                         'url': frag_url, | ||||||
|                         'decrypt_info': decrypt_info, |                         'decrypt_info': decrypt_info, | ||||||
|                         'byte_range': byte_range, |                         'byte_range': map_byte_range, | ||||||
|                         'media_sequence': media_sequence |                         'media_sequence': media_sequence, | ||||||
|                     }) |                     }) | ||||||
|                     media_sequence += 1 |                     media_sequence += 1 | ||||||
|  |  | ||||||
| @@ -244,8 +267,10 @@ class HlsFD(FragmentFD): | |||||||
|                             decrypt_info['KEY'] = external_aes_key |                             decrypt_info['KEY'] = external_aes_key | ||||||
|                         else: |                         else: | ||||||
|                             decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) |                             decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) | ||||||
|                             if extra_query: |                             if extra_key_query or extra_segment_query: | ||||||
|                                 decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) |                                 # Fall back to extra_segment_query to key for backwards compat | ||||||
|  |                                 decrypt_info['URI'] = update_url_query( | ||||||
|  |                                     decrypt_info['URI'], extra_key_query or extra_segment_query) | ||||||
|                             if decrypt_url != decrypt_info['URI']: |                             if decrypt_url != decrypt_info['URI']: | ||||||
|                                 decrypt_info['KEY'] = None |                                 decrypt_info['KEY'] = None | ||||||
|  |  | ||||||
| @@ -253,7 +278,7 @@ class HlsFD(FragmentFD): | |||||||
|                     media_sequence = int(line[22:]) |                     media_sequence = int(line[22:]) | ||||||
|                 elif line.startswith('#EXT-X-BYTERANGE'): |                 elif line.startswith('#EXT-X-BYTERANGE'): | ||||||
|                     splitted_byte_range = line[17:].split('@') |                     splitted_byte_range = line[17:].split('@') | ||||||
|                     sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] |                     sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset | ||||||
|                     byte_range = { |                     byte_range = { | ||||||
|                         'start': sub_range_start, |                         'start': sub_range_start, | ||||||
|                         'end': sub_range_start + int(splitted_byte_range[0]), |                         'end': sub_range_start + int(splitted_byte_range[0]), | ||||||
| @@ -350,9 +375,8 @@ class HlsFD(FragmentFD): | |||||||
|                             # XXX: this should probably be silent as well |                             # XXX: this should probably be silent as well | ||||||
|                             # or verify that all segments contain the same data |                             # or verify that all segments contain the same data | ||||||
|                             self.report_warning(bug_reports_message( |                             self.report_warning(bug_reports_message( | ||||||
|                                 'Discarding a %s block found in the middle of the stream; ' |                                 f'Discarding a {type(block).__name__} block found in the middle of the stream; ' | ||||||
|                                 'if the subtitles display incorrectly,' |                                 'if the subtitles display incorrectly,')) | ||||||
|                                 % (type(block).__name__))) |  | ||||||
|                             continue |                             continue | ||||||
|                     block.write_into(output) |                     block.write_into(output) | ||||||
|  |  | ||||||
| @@ -369,7 +393,10 @@ class HlsFD(FragmentFD): | |||||||
|  |  | ||||||
|                 return output.getvalue().encode() |                 return output.getvalue().encode() | ||||||
|  |  | ||||||
|             self.download_and_append_fragments( |             if len(fragments) == 1: | ||||||
|                 ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) |                 self.download_and_append_fragments(ctx, fragments, info_dict) | ||||||
|  |             else: | ||||||
|  |                 self.download_and_append_fragments( | ||||||
|  |                     ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) | ||||||
|         else: |         else: | ||||||
|             return self.download_and_append_fragments(ctx, fragments, info_dict) |             return self.download_and_append_fragments(ctx, fragments, info_dict) | ||||||
|   | |||||||
| @@ -15,7 +15,6 @@ from ..utils import ( | |||||||
|     ThrottledDownload, |     ThrottledDownload, | ||||||
|     XAttrMetadataError, |     XAttrMetadataError, | ||||||
|     XAttrUnavailableError, |     XAttrUnavailableError, | ||||||
|     encodeFilename, |  | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_http_range, |     parse_http_range, | ||||||
|     try_call, |     try_call, | ||||||
| @@ -58,9 +57,8 @@ class HttpFD(FileDownloader): | |||||||
|  |  | ||||||
|         if self.params.get('continuedl', True): |         if self.params.get('continuedl', True): | ||||||
|             # Establish possible resume length |             # Establish possible resume length | ||||||
|             if os.path.isfile(encodeFilename(ctx.tmpfilename)): |             if os.path.isfile(ctx.tmpfilename): | ||||||
|                 ctx.resume_len = os.path.getsize( |                 ctx.resume_len = os.path.getsize(ctx.tmpfilename) | ||||||
|                     encodeFilename(ctx.tmpfilename)) |  | ||||||
|  |  | ||||||
|         ctx.is_resume = ctx.resume_len > 0 |         ctx.is_resume = ctx.resume_len > 0 | ||||||
|  |  | ||||||
| @@ -176,7 +174,7 @@ class HttpFD(FileDownloader): | |||||||
|                                 'downloaded_bytes': ctx.resume_len, |                                 'downloaded_bytes': ctx.resume_len, | ||||||
|                                 'total_bytes': ctx.resume_len, |                                 'total_bytes': ctx.resume_len, | ||||||
|                             }, info_dict) |                             }, info_dict) | ||||||
|                             raise SucceedDownload() |                             raise SucceedDownload | ||||||
|                         else: |                         else: | ||||||
|                             # The length does not match, we start the download over |                             # The length does not match, we start the download over | ||||||
|                             self.report_unable_to_resume() |                             self.report_unable_to_resume() | ||||||
| @@ -194,7 +192,7 @@ class HttpFD(FileDownloader): | |||||||
|  |  | ||||||
|         def close_stream(): |         def close_stream(): | ||||||
|             if ctx.stream is not None: |             if ctx.stream is not None: | ||||||
|                 if not ctx.tmpfilename == '-': |                 if ctx.tmpfilename != '-': | ||||||
|                     ctx.stream.close() |                     ctx.stream.close() | ||||||
|                 ctx.stream = None |                 ctx.stream = None | ||||||
|  |  | ||||||
| @@ -237,8 +235,13 @@ class HttpFD(FileDownloader): | |||||||
|  |  | ||||||
|             def retry(e): |             def retry(e): | ||||||
|                 close_stream() |                 close_stream() | ||||||
|                 ctx.resume_len = (byte_counter if ctx.tmpfilename == '-' |                 if ctx.tmpfilename == '-': | ||||||
|                                   else os.path.getsize(encodeFilename(ctx.tmpfilename))) |                     ctx.resume_len = byte_counter | ||||||
|  |                 else: | ||||||
|  |                     try: | ||||||
|  |                         ctx.resume_len = os.path.getsize(ctx.tmpfilename) | ||||||
|  |                     except FileNotFoundError: | ||||||
|  |                         ctx.resume_len = 0 | ||||||
|                 raise RetryDownload(e) |                 raise RetryDownload(e) | ||||||
|  |  | ||||||
|             while True: |             while True: | ||||||
| @@ -263,20 +266,20 @@ class HttpFD(FileDownloader): | |||||||
|                         ctx.filename = self.undo_temp_name(ctx.tmpfilename) |                         ctx.filename = self.undo_temp_name(ctx.tmpfilename) | ||||||
|                         self.report_destination(ctx.filename) |                         self.report_destination(ctx.filename) | ||||||
|                     except OSError as err: |                     except OSError as err: | ||||||
|                         self.report_error('unable to open for writing: %s' % str(err)) |                         self.report_error(f'unable to open for writing: {err}') | ||||||
|                         return False |                         return False | ||||||
|  |  | ||||||
|                     if self.params.get('xattr_set_filesize', False) and data_len is not None: |                     if self.params.get('xattr_set_filesize', False) and data_len is not None: | ||||||
|                         try: |                         try: | ||||||
|                             write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) |                             write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) | ||||||
|                         except (XAttrUnavailableError, XAttrMetadataError) as err: |                         except (XAttrUnavailableError, XAttrMetadataError) as err: | ||||||
|                             self.report_error('unable to set filesize xattr: %s' % str(err)) |                             self.report_error(f'unable to set filesize xattr: {err}') | ||||||
|  |  | ||||||
|                 try: |                 try: | ||||||
|                     ctx.stream.write(data_block) |                     ctx.stream.write(data_block) | ||||||
|                 except OSError as err: |                 except OSError as err: | ||||||
|                     self.to_stderr('\n') |                     self.to_stderr('\n') | ||||||
|                     self.report_error('unable to write data: %s' % str(err)) |                     self.report_error(f'unable to write data: {err}') | ||||||
|                     return False |                     return False | ||||||
|  |  | ||||||
|                 # Apply rate limit |                 # Apply rate limit | ||||||
| @@ -322,7 +325,7 @@ class HttpFD(FileDownloader): | |||||||
|                     elif now - ctx.throttle_start > 3: |                     elif now - ctx.throttle_start > 3: | ||||||
|                         if ctx.stream is not None and ctx.tmpfilename != '-': |                         if ctx.stream is not None and ctx.tmpfilename != '-': | ||||||
|                             ctx.stream.close() |                             ctx.stream.close() | ||||||
|                         raise ThrottledDownload() |                         raise ThrottledDownload | ||||||
|                 elif speed: |                 elif speed: | ||||||
|                     ctx.throttle_start = None |                     ctx.throttle_start = None | ||||||
|  |  | ||||||
| @@ -333,7 +336,7 @@ class HttpFD(FileDownloader): | |||||||
|  |  | ||||||
|             if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: |             if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: | ||||||
|                 ctx.resume_len = byte_counter |                 ctx.resume_len = byte_counter | ||||||
|                 raise NextFragment() |                 raise NextFragment | ||||||
|  |  | ||||||
|             if ctx.tmpfilename != '-': |             if ctx.tmpfilename != '-': | ||||||
|                 ctx.stream.close() |                 ctx.stream.close() | ||||||
|   | |||||||
| @@ -251,7 +251,7 @@ class IsmFD(FragmentFD): | |||||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) |         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||||
|  |  | ||||||
|         frag_index = 0 |         frag_index = 0 | ||||||
|         for i, segment in enumerate(segments): |         for segment in segments: | ||||||
|             frag_index += 1 |             frag_index += 1 | ||||||
|             if frag_index <= ctx['fragment_index']: |             if frag_index <= ctx['fragment_index']: | ||||||
|                 continue |                 continue | ||||||
|   | |||||||
| @@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION | |||||||
|  |  | ||||||
|  |  | ||||||
| class MhtmlFD(FragmentFD): | class MhtmlFD(FragmentFD): | ||||||
|     _STYLESHEET = """\ |     _STYLESHEET = '''\ | ||||||
| html, body { | html, body { | ||||||
|     margin: 0; |     margin: 0; | ||||||
|     padding: 0; |     padding: 0; | ||||||
| @@ -45,7 +45,7 @@ body > figure > img { | |||||||
|     max-width: 100%; |     max-width: 100%; | ||||||
|     max-height: calc(100vh - 5em); |     max-height: calc(100vh - 5em); | ||||||
| } | } | ||||||
| """ | ''' | ||||||
|     _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) |     _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) | ||||||
|     _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) |     _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) | ||||||
|  |  | ||||||
| @@ -57,24 +57,19 @@ body > figure > img { | |||||||
|         )).decode('us-ascii') + '?=' |         )).decode('us-ascii') + '?=' | ||||||
|  |  | ||||||
|     def _gen_cid(self, i, fragment, frag_boundary): |     def _gen_cid(self, i, fragment, frag_boundary): | ||||||
|         return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) |         return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' | ||||||
|  |  | ||||||
|     def _gen_stub(self, *, fragments, frag_boundary, title): |     def _gen_stub(self, *, fragments, frag_boundary, title): | ||||||
|         output = io.StringIO() |         output = io.StringIO() | ||||||
|  |  | ||||||
|         output.write(( |         output.write( | ||||||
|             '<!DOCTYPE html>' |             '<!DOCTYPE html>' | ||||||
|             '<html>' |             '<html>' | ||||||
|             '<head>' |             '<head>' | ||||||
|             ''  '<meta name="generator" content="yt-dlp {version}">' |             f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">' | ||||||
|             ''  '<title>{title}</title>' |             f'<title>{escapeHTML(title)}</title>' | ||||||
|             ''  '<style>{styles}</style>' |             f'<style>{self._STYLESHEET}</style>' | ||||||
|             '<body>' |             '<body>') | ||||||
|         ).format( |  | ||||||
|             version=escapeHTML(YT_DLP_VERSION), |  | ||||||
|             styles=self._STYLESHEET, |  | ||||||
|             title=escapeHTML(title) |  | ||||||
|         )) |  | ||||||
|  |  | ||||||
|         t0 = 0 |         t0 = 0 | ||||||
|         for i, frag in enumerate(fragments): |         for i, frag in enumerate(fragments): | ||||||
| @@ -87,15 +82,12 @@ body > figure > img { | |||||||
|                     num=i + 1, |                     num=i + 1, | ||||||
|                     t0=srt_subtitles_timecode(t0), |                     t0=srt_subtitles_timecode(t0), | ||||||
|                     t1=srt_subtitles_timecode(t1), |                     t1=srt_subtitles_timecode(t1), | ||||||
|                     duration=formatSeconds(frag['duration'], msec=True) |                     duration=formatSeconds(frag['duration'], msec=True), | ||||||
|                 )) |                 )) | ||||||
|             except (KeyError, ValueError, TypeError): |             except (KeyError, ValueError, TypeError): | ||||||
|                 t1 = None |                 t1 = None | ||||||
|                 output.write(( |                 output.write(f'<figcaption>Slide #{i + 1}</figcaption>') | ||||||
|                     '<figcaption>Slide #{num}</figcaption>' |             output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">') | ||||||
|                 ).format(num=i + 1)) |  | ||||||
|             output.write('<img src="cid:{cid}">'.format( |  | ||||||
|                 cid=self._gen_cid(i, frag, frag_boundary))) |  | ||||||
|             output.write('</figure>') |             output.write('</figure>') | ||||||
|             t0 = t1 |             t0 = t1 | ||||||
|  |  | ||||||
| @@ -126,31 +118,24 @@ body > figure > img { | |||||||
|             stub = self._gen_stub( |             stub = self._gen_stub( | ||||||
|                 fragments=fragments, |                 fragments=fragments, | ||||||
|                 frag_boundary=frag_boundary, |                 frag_boundary=frag_boundary, | ||||||
|                 title=title |                 title=title, | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|             ctx['dest_stream'].write(( |             ctx['dest_stream'].write(( | ||||||
|                 'MIME-Version: 1.0\r\n' |                 'MIME-Version: 1.0\r\n' | ||||||
|                 'From: <nowhere@yt-dlp.github.io.invalid>\r\n' |                 'From: <nowhere@yt-dlp.github.io.invalid>\r\n' | ||||||
|                 'To: <nowhere@yt-dlp.github.io.invalid>\r\n' |                 'To: <nowhere@yt-dlp.github.io.invalid>\r\n' | ||||||
|                 'Subject: {title}\r\n' |                 f'Subject: {self._escape_mime(title)}\r\n' | ||||||
|                 'Content-type: multipart/related; ' |                 'Content-type: multipart/related; ' | ||||||
|                 ''  'boundary="{boundary}"; ' |                 f'boundary="{frag_boundary}"; ' | ||||||
|                 ''  'type="text/html"\r\n' |                 'type="text/html"\r\n' | ||||||
|                 'X.yt-dlp.Origin: {origin}\r\n' |                 f'X.yt-dlp.Origin: {origin}\r\n' | ||||||
|                 '\r\n' |                 '\r\n' | ||||||
|                 '--{boundary}\r\n' |                 f'--{frag_boundary}\r\n' | ||||||
|                 'Content-Type: text/html; charset=utf-8\r\n' |                 'Content-Type: text/html; charset=utf-8\r\n' | ||||||
|                 'Content-Length: {length}\r\n' |                 f'Content-Length: {len(stub)}\r\n' | ||||||
|                 '\r\n' |                 '\r\n' | ||||||
|                 '{stub}\r\n' |                 f'{stub}\r\n').encode()) | ||||||
|             ).format( |  | ||||||
|                 origin=origin, |  | ||||||
|                 boundary=frag_boundary, |  | ||||||
|                 length=len(stub), |  | ||||||
|                 title=self._escape_mime(title), |  | ||||||
|                 stub=stub |  | ||||||
|             ).encode()) |  | ||||||
|             extra_state['header_written'] = True |             extra_state['header_written'] = True | ||||||
|  |  | ||||||
|         for i, fragment in enumerate(fragments): |         for i, fragment in enumerate(fragments): | ||||||
|   | |||||||
| @@ -2,58 +2,10 @@ import json | |||||||
| import threading | import threading | ||||||
| import time | import time | ||||||
|  |  | ||||||
| from . import get_suitable_downloader |  | ||||||
| from .common import FileDownloader | from .common import FileDownloader | ||||||
| from .external import FFmpegFD | from .external import FFmpegFD | ||||||
| from ..networking import Request | from ..networking import Request | ||||||
| from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get | from ..utils import DownloadError, str_or_none, try_get | ||||||
|  |  | ||||||
|  |  | ||||||
| class NiconicoDmcFD(FileDownloader): |  | ||||||
|     """ Downloading niconico douga from DMC with heartbeat """ |  | ||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |  | ||||||
|         from ..extractor.niconico import NiconicoIE |  | ||||||
|  |  | ||||||
|         self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) |  | ||||||
|         ie = NiconicoIE(self.ydl) |  | ||||||
|         info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) |  | ||||||
|  |  | ||||||
|         fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) |  | ||||||
|  |  | ||||||
|         success = download_complete = False |  | ||||||
|         timer = [None] |  | ||||||
|         heartbeat_lock = threading.Lock() |  | ||||||
|         heartbeat_url = heartbeat_info_dict['url'] |  | ||||||
|         heartbeat_data = heartbeat_info_dict['data'].encode() |  | ||||||
|         heartbeat_interval = heartbeat_info_dict.get('interval', 30) |  | ||||||
|  |  | ||||||
|         request = Request(heartbeat_url, heartbeat_data) |  | ||||||
|  |  | ||||||
|         def heartbeat(): |  | ||||||
|             try: |  | ||||||
|                 self.ydl.urlopen(request).read() |  | ||||||
|             except Exception: |  | ||||||
|                 self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) |  | ||||||
|  |  | ||||||
|             with heartbeat_lock: |  | ||||||
|                 if not download_complete: |  | ||||||
|                     timer[0] = threading.Timer(heartbeat_interval, heartbeat) |  | ||||||
|                     timer[0].start() |  | ||||||
|  |  | ||||||
|         heartbeat_info_dict['ping']() |  | ||||||
|         self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) |  | ||||||
|         try: |  | ||||||
|             heartbeat() |  | ||||||
|             if type(fd).__name__ == 'HlsFD': |  | ||||||
|                 info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) |  | ||||||
|             success = fd.real_download(filename, info_dict) |  | ||||||
|         finally: |  | ||||||
|             if heartbeat_lock: |  | ||||||
|                 with heartbeat_lock: |  | ||||||
|                     timer[0].cancel() |  | ||||||
|                     download_complete = True |  | ||||||
|         return success |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class NiconicoLiveFD(FileDownloader): | class NiconicoLiveFD(FileDownloader): | ||||||
| @@ -64,7 +16,6 @@ class NiconicoLiveFD(FileDownloader): | |||||||
|         ws_url = info_dict['url'] |         ws_url = info_dict['url'] | ||||||
|         ws_extractor = info_dict['ws'] |         ws_extractor = info_dict['ws'] | ||||||
|         ws_origin_host = info_dict['origin'] |         ws_origin_host = info_dict['origin'] | ||||||
|         cookies = info_dict.get('cookies') |  | ||||||
|         live_quality = info_dict.get('live_quality', 'high') |         live_quality = info_dict.get('live_quality', 'high') | ||||||
|         live_latency = info_dict.get('live_latency', 'high') |         live_latency = info_dict.get('live_latency', 'high') | ||||||
|         dl = FFmpegFD(self.ydl, self.params or {}) |         dl = FFmpegFD(self.ydl, self.params or {}) | ||||||
| @@ -76,12 +27,7 @@ class NiconicoLiveFD(FileDownloader): | |||||||
|  |  | ||||||
|         def communicate_ws(reconnect): |         def communicate_ws(reconnect): | ||||||
|             if reconnect: |             if reconnect: | ||||||
|                 ws = WebSocketsWrapper(ws_url, { |                 ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) | ||||||
|                     'Cookies': str_or_none(cookies) or '', |  | ||||||
|                     'Origin': f'https://{ws_origin_host}', |  | ||||||
|                     'Accept': '*/*', |  | ||||||
|                     'User-Agent': self.params['http_headers']['User-Agent'], |  | ||||||
|                 }) |  | ||||||
|                 if self.ydl.params.get('verbose', False): |                 if self.ydl.params.get('verbose', False): | ||||||
|                     self.to_screen('[debug] Sending startWatching request') |                     self.to_screen('[debug] Sending startWatching request') | ||||||
|                 ws.send(json.dumps({ |                 ws.send(json.dumps({ | ||||||
| @@ -91,14 +37,15 @@ class NiconicoLiveFD(FileDownloader): | |||||||
|                             'quality': live_quality, |                             'quality': live_quality, | ||||||
|                             'protocol': 'hls+fmp4', |                             'protocol': 'hls+fmp4', | ||||||
|                             'latency': live_latency, |                             'latency': live_latency, | ||||||
|                             'chasePlay': False |                             'accessRightMethod': 'single_cookie', | ||||||
|  |                             'chasePlay': False, | ||||||
|                         }, |                         }, | ||||||
|                         'room': { |                         'room': { | ||||||
|                             'protocol': 'webSocket', |                             'protocol': 'webSocket', | ||||||
|                             'commentable': True |                             'commentable': True, | ||||||
|                         }, |                         }, | ||||||
|                         'reconnect': True, |                         'reconnect': True, | ||||||
|                     } |                     }, | ||||||
|                 })) |                 })) | ||||||
|             else: |             else: | ||||||
|                 ws = ws_extractor |                 ws = ws_extractor | ||||||
| @@ -124,7 +71,7 @@ class NiconicoLiveFD(FileDownloader): | |||||||
|                     elif self.ydl.params.get('verbose', False): |                     elif self.ydl.params.get('verbose', False): | ||||||
|                         if len(recv) > 100: |                         if len(recv) > 100: | ||||||
|                             recv = recv[:100] + '...' |                             recv = recv[:100] + '...' | ||||||
|                         self.to_screen('[debug] Server said: %s' % recv) |                         self.to_screen(f'[debug] Server said: {recv}') | ||||||
|  |  | ||||||
|         def ws_main(): |         def ws_main(): | ||||||
|             reconnect = False |             reconnect = False | ||||||
| @@ -134,7 +81,7 @@ class NiconicoLiveFD(FileDownloader): | |||||||
|                     if ret is True: |                     if ret is True: | ||||||
|                         return |                         return | ||||||
|                 except BaseException as e: |                 except BaseException as e: | ||||||
|                     self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) |                     self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) | ||||||
|                     time.sleep(10) |                     time.sleep(10) | ||||||
|                     continue |                     continue | ||||||
|                 finally: |                 finally: | ||||||
|   | |||||||
| @@ -8,7 +8,6 @@ from ..utils import ( | |||||||
|     Popen, |     Popen, | ||||||
|     check_executable, |     check_executable, | ||||||
|     encodeArgument, |     encodeArgument, | ||||||
|     encodeFilename, |  | ||||||
|     get_exe_version, |     get_exe_version, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -179,15 +178,15 @@ class RtmpFD(FileDownloader): | |||||||
|             return False |             return False | ||||||
|  |  | ||||||
|         while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: |         while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: | ||||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) |             prevsize = os.path.getsize(tmpfilename) | ||||||
|             self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) |             self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') | ||||||
|             time.sleep(5.0)  # This seems to be needed |             time.sleep(5.0)  # This seems to be needed | ||||||
|             args = basic_args + ['--resume'] |             args = [*basic_args, '--resume'] | ||||||
|             if retval == RD_FAILED: |             if retval == RD_FAILED: | ||||||
|                 args += ['--skip', '1'] |                 args += ['--skip', '1'] | ||||||
|             args = [encodeArgument(a) for a in args] |             args = [encodeArgument(a) for a in args] | ||||||
|             retval = run_rtmpdump(args) |             retval = run_rtmpdump(args) | ||||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) |             cursize = os.path.getsize(tmpfilename) | ||||||
|             if prevsize == cursize and retval == RD_FAILED: |             if prevsize == cursize and retval == RD_FAILED: | ||||||
|                 break |                 break | ||||||
|             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those |             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||||
| @@ -196,8 +195,8 @@ class RtmpFD(FileDownloader): | |||||||
|                 retval = RD_SUCCESS |                 retval = RD_SUCCESS | ||||||
|                 break |                 break | ||||||
|         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): |         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): | ||||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) |             fsize = os.path.getsize(tmpfilename) | ||||||
|             self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) |             self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') | ||||||
|             self.try_rename(tmpfilename, filename) |             self.try_rename(tmpfilename, filename) | ||||||
|             self._hook_progress({ |             self._hook_progress({ | ||||||
|                 'downloaded_bytes': fsize, |                 'downloaded_bytes': fsize, | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ import os | |||||||
| import subprocess | import subprocess | ||||||
|  |  | ||||||
| from .common import FileDownloader | from .common import FileDownloader | ||||||
| from ..utils import check_executable, encodeFilename | from ..utils import check_executable | ||||||
|  |  | ||||||
|  |  | ||||||
| class RtspFD(FileDownloader): | class RtspFD(FileDownloader): | ||||||
| @@ -26,7 +26,7 @@ class RtspFD(FileDownloader): | |||||||
|  |  | ||||||
|         retval = subprocess.call(args) |         retval = subprocess.call(args) | ||||||
|         if retval == 0: |         if retval == 0: | ||||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) |             fsize = os.path.getsize(tmpfilename) | ||||||
|             self.to_screen(f'\r[{args[0]}] {fsize} bytes') |             self.to_screen(f'\r[{args[0]}] {fsize} bytes') | ||||||
|             self.try_rename(tmpfilename, filename) |             self.try_rename(tmpfilename, filename) | ||||||
|             self._hook_progress({ |             self._hook_progress({ | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD): | |||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         video_id = info_dict['video_id'] |         video_id = info_dict['video_id'] | ||||||
|         self.to_screen('[%s] Downloading live chat' % self.FD_NAME) |         self.to_screen(f'[{self.FD_NAME}] Downloading live chat') | ||||||
|         if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': |         if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': | ||||||
|             self.report_warning('Live chat download runs until the livestream ends. ' |             self.report_warning('Live chat download runs until the livestream ends. ' | ||||||
|                                 'If you wish to download the video simultaneously, run a separate yt-dlp instance') |                                 'If you wish to download the video simultaneously, run a separate yt-dlp instance') | ||||||
| @@ -123,8 +123,8 @@ class YoutubeLiveChatFD(FragmentFD): | |||||||
|                         data, |                         data, | ||||||
|                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} |                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} | ||||||
|  |  | ||||||
|                     func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live |                     func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live) | ||||||
|                             or frag_index == 1 and try_refresh_replay_beginning |                             or (frag_index == 1 and try_refresh_replay_beginning) | ||||||
|                             or parse_actions_replay) |                             or parse_actions_replay) | ||||||
|                     return (True, *func(live_chat_continuation)) |                     return (True, *func(live_chat_continuation)) | ||||||
|                 except HTTPError as err: |                 except HTTPError as err: | ||||||
|   | |||||||
| @@ -1,16 +1,25 @@ | |||||||
| from ..compat.compat_utils import passthrough_module | from ..compat.compat_utils import passthrough_module | ||||||
|  | from ..globals import extractors as _extractors_context | ||||||
|  | from ..globals import plugin_ies as _plugin_ies_context | ||||||
|  | from ..plugins import PluginSpec, register_plugin_spec | ||||||
|  |  | ||||||
| passthrough_module(__name__, '.extractors') | passthrough_module(__name__, '.extractors') | ||||||
| del passthrough_module | del passthrough_module | ||||||
|  |  | ||||||
|  | register_plugin_spec(PluginSpec( | ||||||
|  |     module_name='extractor', | ||||||
|  |     suffix='IE', | ||||||
|  |     destination=_extractors_context, | ||||||
|  |     plugin_destination=_plugin_ies_context, | ||||||
|  | )) | ||||||
|  |  | ||||||
|  |  | ||||||
| def gen_extractor_classes(): | def gen_extractor_classes(): | ||||||
|     """ Return a list of supported extractors. |     """ Return a list of supported extractors. | ||||||
|     The order does matter; the first extractor matched is the one handling the URL. |     The order does matter; the first extractor matched is the one handling the URL. | ||||||
|     """ |     """ | ||||||
|     from .extractors import _ALL_CLASSES |     import_extractors() | ||||||
|  |     return list(_extractors_context.value.values()) | ||||||
|     return _ALL_CLASSES |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def gen_extractors(): | def gen_extractors(): | ||||||
| @@ -37,6 +46,9 @@ def list_extractors(age_limit=None): | |||||||
|  |  | ||||||
| def get_info_extractor(ie_name): | def get_info_extractor(ie_name): | ||||||
|     """Returns the info extractor class with the given ie_name""" |     """Returns the info extractor class with the given ie_name""" | ||||||
|     from . import extractors |     import_extractors() | ||||||
|  |     return _extractors_context.value[f'{ie_name}IE'] | ||||||
|  |  | ||||||
|     return getattr(extractors, f'{ie_name}IE') |  | ||||||
|  | def import_extractors(): | ||||||
|  |     from . import extractors  # noqa: F401 | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -4,18 +4,18 @@ import re | |||||||
| import time | import time | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     dict_get, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     js_to_json, |     dict_get, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     js_to_json, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     str_or_none, |     str_or_none, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     try_get, |     try_get, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     update_url_query, |     update_url_query, | ||||||
|  |     url_or_none, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -66,7 +66,7 @@ class ABCIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'WWI Centenary', |             'title': 'WWI Centenary', | ||||||
|             'description': 'md5:c2379ec0ca84072e86b446e536954546', |             'description': 'md5:c2379ec0ca84072e86b446e536954546', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', |         'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -74,7 +74,7 @@ class ABCIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', |             'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', | ||||||
|             'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', |             'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', |         'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -85,7 +85,7 @@ class ABCIE(InfoExtractor): | |||||||
|             'upload_date': '20200813', |             'upload_date': '20200813', | ||||||
|             'uploader': 'Behind the News', |             'uploader': 'Behind the News', | ||||||
|             'uploader_id': 'behindthenews', |             'uploader_id': 'behindthenews', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', |         'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -94,7 +94,7 @@ class ABCIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', |             'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', | ||||||
|             'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', |             'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -125,7 +125,7 @@ class ABCIE(InfoExtractor): | |||||||
|                 if mobj is None: |                 if mobj is None: | ||||||
|                     expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None) |                     expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None) | ||||||
|                     if expired: |                     if expired: | ||||||
|                         raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) |                         raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) | ||||||
|                     raise ExtractorError('Unable to extract video urls') |                     raise ExtractorError('Unable to extract video urls') | ||||||
|  |  | ||||||
|             urls_info = self._parse_json( |             urls_info = self._parse_json( | ||||||
| @@ -163,7 +163,7 @@ class ABCIE(InfoExtractor): | |||||||
|                 'height': height, |                 'height': height, | ||||||
|                 'tbr': bitrate, |                 'tbr': bitrate, | ||||||
|                 'filesize': int_or_none(url_info.get('filesize')), |                 'filesize': int_or_none(url_info.get('filesize')), | ||||||
|                 'format_id': format_id |                 'format_id': format_id, | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
| @@ -180,20 +180,100 @@ class ABCIViewIE(InfoExtractor): | |||||||
|     _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)' |     _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)' | ||||||
|     _GEO_COUNTRIES = ['AU'] |     _GEO_COUNTRIES = ['AU'] | ||||||
|  |  | ||||||
|     # ABC iview programs are normally available for 14 days only. |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|  |         'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00', | ||||||
|  |         'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'CO1211V001S00', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Series 1 Ep 1 Wood For The Trees', | ||||||
|  |             'series': 'Utopia', | ||||||
|  |             'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00', | ||||||
|  |             'upload_date': '20230726', | ||||||
|  |             'uploader_id': 'abc1', | ||||||
|  |             'series_id': 'CO1211V', | ||||||
|  |             'episode_id': 'CO1211V001S00', | ||||||
|  |             'season_number': 1, | ||||||
|  |             'season': 'Season 1', | ||||||
|  |             'episode_number': 1, | ||||||
|  |             'episode': 'Wood For The Trees', | ||||||
|  |             'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg', | ||||||
|  |             'timestamp': 1690403700, | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'note': 'No episode name', | ||||||
|         'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', |         'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', | ||||||
|         'md5': '67715ce3c78426b11ba167d875ac6abf', |         'md5': '67715ce3c78426b11ba167d875ac6abf', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'LE1927H001S00', |             'id': 'LE1927H001S00', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': "Series 11 Ep 1", |             'title': 'Series 11 Ep 1', | ||||||
|             'series': "Gruen", |             'series': 'Gruen', | ||||||
|             'description': 'md5:52cc744ad35045baf6aded2ce7287f67', |             'description': 'md5:52cc744ad35045baf6aded2ce7287f67', | ||||||
|             'upload_date': '20190925', |             'upload_date': '20190925', | ||||||
|             'uploader_id': 'abc1', |             'uploader_id': 'abc1', | ||||||
|  |             'series_id': 'LE1927H', | ||||||
|  |             'episode_id': 'LE1927H001S00', | ||||||
|  |             'season_number': 11, | ||||||
|  |             'season': 'Season 11', | ||||||
|  |             'episode_number': 1, | ||||||
|  |             'episode': 'Episode 1', | ||||||
|  |             'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg', | ||||||
|             'timestamp': 1569445289, |             'timestamp': 1569445289, | ||||||
|         }, |         }, | ||||||
|  |         'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'note': 'No episode number', | ||||||
|  |         'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00', | ||||||
|  |         'md5': '77cb7d8434440e3b28fbebe331c2456a', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'NC2203H039S00', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Series 2022 Locking Up Kids', | ||||||
|  |             'series': 'Four Corners', | ||||||
|  |             'description': 'md5:54829ca108846d1a70e1fcce2853e720', | ||||||
|  |             'upload_date': '20221114', | ||||||
|  |             'uploader_id': 'abc1', | ||||||
|  |             'series_id': 'NC2203H', | ||||||
|  |             'episode_id': 'NC2203H039S00', | ||||||
|  |             'season_number': 2022, | ||||||
|  |             'season': 'Season 2022', | ||||||
|  |             'episode': 'Locking Up Kids', | ||||||
|  |             'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg', | ||||||
|  |             'timestamp': 1668460497, | ||||||
|  |  | ||||||
|  |         }, | ||||||
|  |         'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'note': 'No episode name or number', | ||||||
|  |         'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00', | ||||||
|  |         'md5': '2e17dec06b13cc81dc119d2565289396', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'RF2004Q043S00', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Series 2021', | ||||||
|  |             'series': 'Landline', | ||||||
|  |             'description': 'md5:c9f30d9c0c914a7fd23842f6240be014', | ||||||
|  |             'upload_date': '20211205', | ||||||
|  |             'uploader_id': 'abc1', | ||||||
|  |             'series_id': 'RF2004Q', | ||||||
|  |             'episode_id': 'RF2004Q043S00', | ||||||
|  |             'season_number': 2021, | ||||||
|  |             'season': 'Season 2021', | ||||||
|  |             'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg', | ||||||
|  |             'timestamp': 1638710705, | ||||||
|  |  | ||||||
|  |         }, | ||||||
|  |         'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], | ||||||
|         'params': { |         'params': { | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
| @@ -207,13 +287,12 @@ class ABCIViewIE(InfoExtractor): | |||||||
|         stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) |         stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) | ||||||
|  |  | ||||||
|         house_number = video_params.get('episodeHouseNumber') or video_id |         house_number = video_params.get('episodeHouseNumber') or video_id | ||||||
|         path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( |         path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' | ||||||
|             int(time.time()), house_number) |  | ||||||
|         sig = hmac.new( |         sig = hmac.new( | ||||||
|             b'android.content.res.Resources', |             b'android.content.res.Resources', | ||||||
|             path.encode('utf-8'), hashlib.sha256).hexdigest() |             path.encode(), hashlib.sha256).hexdigest() | ||||||
|         token = self._download_webpage( |         token = self._download_webpage( | ||||||
|             'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) |             f'http://iview.abc.net.au{path}&sig={sig}', video_id) | ||||||
|  |  | ||||||
|         def tokenize_url(url, token): |         def tokenize_url(url, token): | ||||||
|             return update_url_query(url, { |             return update_url_query(url, { | ||||||
| @@ -222,7 +301,7 @@ class ABCIViewIE(InfoExtractor): | |||||||
|  |  | ||||||
|         for sd in ('1080', '720', 'sd', 'sd-low'): |         for sd in ('1080', '720', 'sd', 'sd-low'): | ||||||
|             sd_url = try_get( |             sd_url = try_get( | ||||||
|                 stream, lambda x: x['streams']['hls'][sd], compat_str) |                 stream, lambda x: x['streams']['hls'][sd], str) | ||||||
|             if not sd_url: |             if not sd_url: | ||||||
|                 continue |                 continue | ||||||
|             formats = self._extract_m3u8_formats( |             formats = self._extract_m3u8_formats( | ||||||
| @@ -255,6 +334,8 @@ class ABCIViewIE(InfoExtractor): | |||||||
|             'episode_number': int_or_none(self._search_regex( |             'episode_number': int_or_none(self._search_regex( | ||||||
|                 r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), |                 r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), | ||||||
|             'episode_id': house_number, |             'episode_id': house_number, | ||||||
|  |             'episode': self._search_regex( | ||||||
|  |                 r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None, | ||||||
|             'uploader_id': video_params.get('channel'), |             'uploader_id': video_params.get('channel'), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
| @@ -275,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): | |||||||
|             'description': 'md5:93119346c24a7c322d446d8eece430ff', |             'description': 'md5:93119346c24a7c322d446d8eece430ff', | ||||||
|             'series': 'Upper Middle Bogan', |             'series': 'Upper Middle Bogan', | ||||||
|             'season': 'Series 1', |             'season': 'Series 1', | ||||||
|             'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' |             'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', | ||||||
|         }, |         }, | ||||||
|         'playlist_count': 8, |         'playlist_count': 8, | ||||||
|     }, { |     }, { | ||||||
| @@ -294,17 +375,39 @@ class ABCIViewShowSeriesIE(InfoExtractor): | |||||||
|             'noplaylist': True, |             'noplaylist': True, | ||||||
|             'skip_download': 'm3u8', |             'skip_download': 'm3u8', | ||||||
|         }, |         }, | ||||||
|  |     }, { | ||||||
|  |         # 'videoEpisodes' is a dict with `items` key | ||||||
|  |         'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '178458-0', | ||||||
|  |             'title': 'Episodes', | ||||||
|  |             'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', | ||||||
|  |             'series': '7.30 Mark Humphries Satire', | ||||||
|  |             'season': 'Episodes', | ||||||
|  |             'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 15, | ||||||
|  |         'skip': 'This program is not currently available in ABC iview', | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://iview.abc.net.au/show/inbestigators', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '175343-1', | ||||||
|  |             'title': 'Series 1', | ||||||
|  |             'description': 'md5:b9976935a6450e5b78ce2a940a755685', | ||||||
|  |             'series': 'The Inbestigators', | ||||||
|  |             'season': 'Series 1', | ||||||
|  |             'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 17, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         show_id = self._match_id(url) |         show_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, show_id) |         webpage = self._download_webpage(url, show_id) | ||||||
|         webpage_data = self._search_regex( |         video_data = self._search_json( | ||||||
|             r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', |             r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id, | ||||||
|             webpage, 'initial state') |             transform_source=lambda x: x.encode().decode('unicode_escape'), | ||||||
|         video_data = self._parse_json( |             end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded'] | ||||||
|             unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) |  | ||||||
|         video_data = video_data['route']['pageData']['_embedded'] |  | ||||||
|  |  | ||||||
|         highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) |         highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) | ||||||
|         if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'): |         if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'): | ||||||
| @@ -313,12 +416,14 @@ class ABCIViewShowSeriesIE(InfoExtractor): | |||||||
|         series = video_data['selectedSeries'] |         series = video_data['selectedSeries'] | ||||||
|         return { |         return { | ||||||
|             '_type': 'playlist', |             '_type': 'playlist', | ||||||
|             'entries': [self.url_result(episode['shareUrl']) |             'entries': [self.url_result(episode_url, ABCIViewIE) | ||||||
|                         for episode in series['_embedded']['videoEpisodes']], |                         for episode_url in traverse_obj(series, ( | ||||||
|  |                             '_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))], | ||||||
|             'id': series.get('id'), |             'id': series.get('id'), | ||||||
|             'title': dict_get(series, ('title', 'displaySubtitle')), |             'title': dict_get(series, ('title', 'displaySubtitle')), | ||||||
|             'description': series.get('description'), |             'description': series.get('description'), | ||||||
|             'series': dict_get(series, ('showTitle', 'displayTitle')), |             'series': dict_get(series, ('showTitle', 'displayTitle')), | ||||||
|             'season': dict_get(series, ('title', 'displaySubtitle')), |             'season': dict_get(series, ('title', 'displaySubtitle')), | ||||||
|             'thumbnail': series.get('thumbnail'), |             'thumbnail': traverse_obj( | ||||||
|  |                 series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False), | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE): | |||||||
|         display_id = mobj.group('display_id') |         display_id = mobj.group('display_id') | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         info_dict = self._extract_feed_info( |         info_dict = self._extract_feed_info( | ||||||
|             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) |             f'http://abcnews.go.com/video/itemfeed?id={video_id}') | ||||||
|         info_dict.update({ |         info_dict.update({ | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'display_id': display_id, |             'display_id': display_id, | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     dict_get, |     dict_get, | ||||||
|     int_or_none, |     int_or_none, | ||||||
| @@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor): | |||||||
|         data = self._download_json( |         data = self._download_json( | ||||||
|             'https://api.abcotvs.com/v2/content', display_id, query={ |             'https://api.abcotvs.com/v2/content', display_id, query={ | ||||||
|                 'id': video_id, |                 'id': video_id, | ||||||
|                 'key': 'otv.web.%s.story' % station, |                 'key': f'otv.web.{station}.story', | ||||||
|                 'station': station, |                 'station': station, | ||||||
|             })['data'] |             })['data'] | ||||||
|         video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data |         video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data | ||||||
|         video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) |         video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) | ||||||
|         title = video.get('title') or video['linkText'] |         title = video.get('title') or video['linkText'] | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|   | |||||||
| @@ -6,53 +6,54 @@ import hmac | |||||||
| import io | import io | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
| import struct |  | ||||||
| import time | import time | ||||||
| import urllib.parse | import urllib.parse | ||||||
| import urllib.request |  | ||||||
| import urllib.response |  | ||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..aes import aes_ecb_decrypt | from ..aes import aes_ecb_decrypt | ||||||
|  | from ..networking import RequestHandler, Response | ||||||
|  | from ..networking.exceptions import TransportError | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     bytes_to_intlist, |     OnDemandPagedList, | ||||||
|     decode_base_n, |     decode_base_n, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     intlist_to_bytes, |  | ||||||
|     OnDemandPagedList, |  | ||||||
|     time_seconds, |     time_seconds, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|  |     update_url, | ||||||
|     update_url_query, |     update_url_query, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def add_opener(ydl, handler):  # FIXME: Create proper API in .networking | class AbemaLicenseRH(RequestHandler): | ||||||
|     """Add a handler for opening URLs, like _download_webpage""" |     _SUPPORTED_URL_SCHEMES = ('abematv-license',) | ||||||
|     # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 |     _SUPPORTED_PROXY_SCHEMES = None | ||||||
|     # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 |     _SUPPORTED_FEATURES = None | ||||||
|     rh = ydl._request_director.handlers['Urllib'] |     RH_NAME = 'abematv_license' | ||||||
|     if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES: |  | ||||||
|         return |  | ||||||
|     opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies) |  | ||||||
|     assert isinstance(opener, urllib.request.OpenerDirector) |  | ||||||
|     opener.add_handler(handler) |  | ||||||
|     rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license') |  | ||||||
|  |  | ||||||
|  |     _STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' | ||||||
|  |     _HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' | ||||||
|  |  | ||||||
| class AbemaLicenseHandler(urllib.request.BaseHandler): |     def __init__(self, *, ie: 'AbemaTVIE', **kwargs): | ||||||
|     handler_order = 499 |         super().__init__(**kwargs) | ||||||
|     STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' |  | ||||||
|     HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' |  | ||||||
|  |  | ||||||
|     def __init__(self, ie: 'AbemaTVIE'): |  | ||||||
|         # the protocol that this should really handle is 'abematv-license://' |  | ||||||
|         # abematv_license_open is just a placeholder for development purposes |  | ||||||
|         # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510 |  | ||||||
|         setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open')) |  | ||||||
|         self.ie = ie |         self.ie = ie | ||||||
|  |  | ||||||
|  |     def _send(self, request): | ||||||
|  |         url = request.url | ||||||
|  |         ticket = urllib.parse.urlparse(url).netloc | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             response_data = self._get_videokey_from_ticket(ticket) | ||||||
|  |         except ExtractorError as e: | ||||||
|  |             raise TransportError(cause=e.cause) from e | ||||||
|  |         except (IndexError, KeyError, TypeError) as e: | ||||||
|  |             raise TransportError(cause=repr(e)) from e | ||||||
|  |  | ||||||
|  |         return Response( | ||||||
|  |             io.BytesIO(response_data), url, | ||||||
|  |             headers={'Content-Length': str(len(response_data))}) | ||||||
|  |  | ||||||
|     def _get_videokey_from_ticket(self, ticket): |     def _get_videokey_from_ticket(self, ticket): | ||||||
|         to_show = self.ie.get_param('verbose', False) |         to_show = self.ie.get_param('verbose', False) | ||||||
|         media_token = self.ie._get_media_token(to_show=to_show) |         media_token = self.ie._get_media_token(to_show=to_show) | ||||||
| @@ -62,33 +63,27 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): | |||||||
|             query={'t': media_token}, |             query={'t': media_token}, | ||||||
|             data=json.dumps({ |             data=json.dumps({ | ||||||
|                 'kv': 'a', |                 'kv': 'a', | ||||||
|                 'lt': ticket |                 'lt': ticket, | ||||||
|             }).encode('utf-8'), |             }).encode(), | ||||||
|             headers={ |             headers={ | ||||||
|                 'Content-Type': 'application/json', |                 'Content-Type': 'application/json', | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         res = decode_base_n(license_response['k'], table=self.STRTABLE) |         res = decode_base_n(license_response['k'], table=self._STRTABLE) | ||||||
|         encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) |         encvideokey = list(res.to_bytes(16, 'big')) | ||||||
|  |  | ||||||
|         h = hmac.new( |         h = hmac.new( | ||||||
|             binascii.unhexlify(self.HKEY), |             binascii.unhexlify(self._HKEY), | ||||||
|             (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), |             (license_response['cid'] + self.ie._DEVICE_ID).encode(), | ||||||
|             digestmod=hashlib.sha256) |             digestmod=hashlib.sha256) | ||||||
|         enckey = bytes_to_intlist(h.digest()) |         enckey = list(h.digest()) | ||||||
|  |  | ||||||
|         return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) |         return bytes(aes_ecb_decrypt(encvideokey, enckey)) | ||||||
|  |  | ||||||
|     def abematv_license_open(self, url): |  | ||||||
|         url = url.get_full_url() if isinstance(url, urllib.request.Request) else url |  | ||||||
|         ticket = urllib.parse.urlparse(url).netloc |  | ||||||
|         response_data = self._get_videokey_from_ticket(ticket) |  | ||||||
|         return urllib.response.addinfourl(io.BytesIO(response_data), headers={ |  | ||||||
|             'Content-Length': str(len(response_data)), |  | ||||||
|         }, url=url, code=200) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AbemaTVBaseIE(InfoExtractor): | class AbemaTVBaseIE(InfoExtractor): | ||||||
|  |     _NETRC_MACHINE = 'abematv' | ||||||
|  |  | ||||||
|     _USERTOKEN = None |     _USERTOKEN = None | ||||||
|     _DEVICE_ID = None |     _DEVICE_ID = None | ||||||
|     _MEDIATOKEN = None |     _MEDIATOKEN = None | ||||||
| @@ -97,11 +92,11 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _generate_aks(cls, deviceid): |     def _generate_aks(cls, deviceid): | ||||||
|         deviceid = deviceid.encode('utf-8') |         deviceid = deviceid.encode() | ||||||
|         # add 1 hour and then drop minute and secs |         # add 1 hour and then drop minute and secs | ||||||
|         ts_1hour = int((time_seconds() // 3600 + 1) * 3600) |         ts_1hour = int((time_seconds() // 3600 + 1) * 3600) | ||||||
|         time_struct = time.gmtime(ts_1hour) |         time_struct = time.gmtime(ts_1hour) | ||||||
|         ts_1hour_str = str(ts_1hour).encode('utf-8') |         ts_1hour_str = str(ts_1hour).encode() | ||||||
|  |  | ||||||
|         tmp = None |         tmp = None | ||||||
|  |  | ||||||
| @@ -113,7 +108,7 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|  |  | ||||||
|         def mix_tmp(count): |         def mix_tmp(count): | ||||||
|             nonlocal tmp |             nonlocal tmp | ||||||
|             for i in range(count): |             for _ in range(count): | ||||||
|                 mix_once(tmp) |                 mix_once(tmp) | ||||||
|  |  | ||||||
|         def mix_twist(nonce): |         def mix_twist(nonce): | ||||||
| @@ -133,11 +128,15 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|         if self._USERTOKEN: |         if self._USERTOKEN: | ||||||
|             return self._USERTOKEN |             return self._USERTOKEN | ||||||
|  |  | ||||||
|  |         self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None)) | ||||||
|  |  | ||||||
|         username, _ = self._get_login_info() |         username, _ = self._get_login_info() | ||||||
|         AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) |         auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') | ||||||
|  |         AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken') | ||||||
|         if AbemaTVBaseIE._USERTOKEN: |         if AbemaTVBaseIE._USERTOKEN: | ||||||
|             # try authentication with locally stored token |             # try authentication with locally stored token | ||||||
|             try: |             try: | ||||||
|  |                 AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id') | ||||||
|                 self._get_media_token(True) |                 self._get_media_token(True) | ||||||
|                 return |                 return | ||||||
|             except ExtractorError as e: |             except ExtractorError as e: | ||||||
| @@ -150,13 +149,12 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|             data=json.dumps({ |             data=json.dumps({ | ||||||
|                 'deviceId': self._DEVICE_ID, |                 'deviceId': self._DEVICE_ID, | ||||||
|                 'applicationKeySecret': aks, |                 'applicationKeySecret': aks, | ||||||
|             }).encode('utf-8'), |             }).encode(), | ||||||
|             headers={ |             headers={ | ||||||
|                 'Content-Type': 'application/json', |                 'Content-Type': 'application/json', | ||||||
|             }) |             }) | ||||||
|         AbemaTVBaseIE._USERTOKEN = user_data['token'] |         AbemaTVBaseIE._USERTOKEN = user_data['token'] | ||||||
|  |  | ||||||
|         add_opener(self._downloader, AbemaLicenseHandler(self)) |  | ||||||
|         return self._USERTOKEN |         return self._USERTOKEN | ||||||
|  |  | ||||||
|     def _get_media_token(self, invalidate=False, to_show=True): |     def _get_media_token(self, invalidate=False, to_show=True): | ||||||
| @@ -171,13 +169,44 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|                 'osLang': 'ja_JP', |                 'osLang': 'ja_JP', | ||||||
|                 'osTimezone': 'Asia/Tokyo', |                 'osTimezone': 'Asia/Tokyo', | ||||||
|                 'appId': 'tv.abema', |                 'appId': 'tv.abema', | ||||||
|                 'appVersion': '3.27.1' |                 'appVersion': '3.27.1', | ||||||
|             }, headers={ |             }, headers={ | ||||||
|                 'Authorization': f'bearer {self._get_device_token()}', |                 'Authorization': f'bearer {self._get_device_token()}', | ||||||
|             })['token'] |             })['token'] | ||||||
|  |  | ||||||
|         return self._MEDIATOKEN |         return self._MEDIATOKEN | ||||||
|  |  | ||||||
|  |     def _perform_login(self, username, password): | ||||||
|  |         self._get_device_token() | ||||||
|  |         if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token(): | ||||||
|  |             self.write_debug('Skipping logging in') | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         if '@' in username:  # don't strictly check if it's email address or not | ||||||
|  |             ep, method = 'user/email', 'email' | ||||||
|  |         else: | ||||||
|  |             ep, method = 'oneTimePassword', 'userId' | ||||||
|  |  | ||||||
|  |         login_response = self._download_json( | ||||||
|  |             f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', | ||||||
|  |             data=json.dumps({ | ||||||
|  |                 method: username, | ||||||
|  |                 'password': password, | ||||||
|  |             }).encode(), headers={ | ||||||
|  |                 'Authorization': f'bearer {self._get_device_token()}', | ||||||
|  |                 'Origin': 'https://abema.tv', | ||||||
|  |                 'Referer': 'https://abema.tv/', | ||||||
|  |                 'Content-Type': 'application/json', | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         AbemaTVBaseIE._USERTOKEN = login_response['token'] | ||||||
|  |         self._get_media_token(True) | ||||||
|  |         auth_cache = { | ||||||
|  |             'device_id': AbemaTVBaseIE._DEVICE_ID, | ||||||
|  |             'usertoken': AbemaTVBaseIE._USERTOKEN, | ||||||
|  |         } | ||||||
|  |         self.cache.store(self._NETRC_MACHINE, username, auth_cache) | ||||||
|  |  | ||||||
|     def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'): |     def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'): | ||||||
|         return self._download_json( |         return self._download_json( | ||||||
|             f'https://api.abema.io/{endpoint}', video_id, query=query or {}, |             f'https://api.abema.io/{endpoint}', video_id, query=query or {}, | ||||||
| @@ -201,14 +230,14 @@ class AbemaTVBaseIE(InfoExtractor): | |||||||
|  |  | ||||||
| class AbemaTVIE(AbemaTVBaseIE): | class AbemaTVIE(AbemaTVBaseIE): | ||||||
|     _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)' |     _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)' | ||||||
|     _NETRC_MACHINE = 'abematv' |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://abema.tv/video/episode/194-25_s2_p1', |         'url': 'https://abema.tv/video/episode/194-25_s2_p1', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '194-25_s2_p1', |             'id': '194-25_s2_p1', | ||||||
|             'title': '第1話 「チーズケーキ」 「モーニング再び」', |             'title': '第1話 「チーズケーキ」 「モーニング再び」', | ||||||
|             'series': '異世界食堂2', |             'series': '異世界食堂2', | ||||||
|             'series_number': 2, |             'season': 'シーズン2', | ||||||
|  |             'season_number': 2, | ||||||
|             'episode': '第1話 「チーズケーキ」 「モーニング再び」', |             'episode': '第1話 「チーズケーキ」 「モーニング再び」', | ||||||
|             'episode_number': 1, |             'episode_number': 1, | ||||||
|         }, |         }, | ||||||
| @@ -220,7 +249,7 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|             'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', |             'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', | ||||||
|             'series': 'ゆるキャン△ SEASON2', |             'series': 'ゆるキャン△ SEASON2', | ||||||
|             'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', |             'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', | ||||||
|             'series_number': 2, |             'season_number': 2, | ||||||
|             'episode_number': 1, |             'episode_number': 1, | ||||||
|             'description': 'md5:9c5a3172ae763278f9303922f0ea5b17', |             'description': 'md5:9c5a3172ae763278f9303922f0ea5b17', | ||||||
|         }, |         }, | ||||||
| @@ -249,33 +278,6 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|     }] |     }] | ||||||
|     _TIMETABLE = None |     _TIMETABLE = None | ||||||
|  |  | ||||||
|     def _perform_login(self, username, password): |  | ||||||
|         self._get_device_token() |  | ||||||
|         if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token(): |  | ||||||
|             self.write_debug('Skipping logging in') |  | ||||||
|             return |  | ||||||
|  |  | ||||||
|         if '@' in username:  # don't strictly check if it's email address or not |  | ||||||
|             ep, method = 'user/email', 'email' |  | ||||||
|         else: |  | ||||||
|             ep, method = 'oneTimePassword', 'userId' |  | ||||||
|  |  | ||||||
|         login_response = self._download_json( |  | ||||||
|             f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', |  | ||||||
|             data=json.dumps({ |  | ||||||
|                 method: username, |  | ||||||
|                 'password': password |  | ||||||
|             }).encode('utf-8'), headers={ |  | ||||||
|                 'Authorization': f'bearer {self._get_device_token()}', |  | ||||||
|                 'Origin': 'https://abema.tv', |  | ||||||
|                 'Referer': 'https://abema.tv/', |  | ||||||
|                 'Content-Type': 'application/json', |  | ||||||
|             }) |  | ||||||
|  |  | ||||||
|         AbemaTVBaseIE._USERTOKEN = login_response['token'] |  | ||||||
|         self._get_media_token(True) |  | ||||||
|         self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN) |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         # starting download using infojson from this extractor is undefined behavior, |         # starting download using infojson from this extractor is undefined behavior, | ||||||
|         # and never be fixed in the future; you must trigger downloads by directly specifying URL. |         # and never be fixed in the future; you must trigger downloads by directly specifying URL. | ||||||
| @@ -331,7 +333,7 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|  |  | ||||||
|         description = self._html_search_regex( |         description = self._html_search_regex( | ||||||
|             (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div', |             (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div', | ||||||
|              r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',), |              r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'), | ||||||
|             webpage, 'description', default=None, group=1) |             webpage, 'description', default=None, group=1) | ||||||
|         if not description: |         if not description: | ||||||
|             og_desc = self._html_search_meta( |             og_desc = self._html_search_meta( | ||||||
| @@ -344,17 +346,18 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|                     )? |                     )? | ||||||
|                 ''', r'\1', og_desc) |                 ''', r'\1', og_desc) | ||||||
|  |  | ||||||
|         # canonical URL may contain series and episode number |         # canonical URL may contain season and episode number | ||||||
|         mobj = re.search(r's(\d+)_p(\d+)$', canonical_url) |         mobj = re.search(r's(\d+)_p(\d+)$', canonical_url) | ||||||
|         if mobj: |         if mobj: | ||||||
|             seri = int_or_none(mobj.group(1), default=float('inf')) |             seri = int_or_none(mobj.group(1), default=float('inf')) | ||||||
|             epis = int_or_none(mobj.group(2), default=float('inf')) |             epis = int_or_none(mobj.group(2), default=float('inf')) | ||||||
|             info['series_number'] = seri if seri < 100 else None |             info['season_number'] = seri if seri < 100 else None | ||||||
|             # some anime like Detective Conan (though not available in AbemaTV) |             # some anime like Detective Conan (though not available in AbemaTV) | ||||||
|             # has more than 1000 episodes (1026 as of 2021/11/15) |             # has more than 1000 episodes (1026 as of 2021/11/15) | ||||||
|             info['episode_number'] = epis if epis < 2000 else None |             info['episode_number'] = epis if epis < 2000 else None | ||||||
|  |  | ||||||
|         is_live, m3u8_url = False, None |         is_live, m3u8_url = False, None | ||||||
|  |         availability = 'public' | ||||||
|         if video_type == 'now-on-air': |         if video_type == 'now-on-air': | ||||||
|             is_live = True |             is_live = True | ||||||
|             channel_url = 'https://api.abema.io/v1/channels' |             channel_url = 'https://api.abema.io/v1/channels' | ||||||
| @@ -372,13 +375,13 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|                 f'https://api.abema.io/v1/video/programs/{video_id}', video_id, |                 f'https://api.abema.io/v1/video/programs/{video_id}', video_id, | ||||||
|                 note='Checking playability', |                 note='Checking playability', | ||||||
|                 headers=headers) |                 headers=headers) | ||||||
|             ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) |             if not traverse_obj(api_response, ('label', 'free', {bool})): | ||||||
|             if 3 not in ondemand_types: |  | ||||||
|                 # cannot acquire decryption key for these streams |                 # cannot acquire decryption key for these streams | ||||||
|                 self.report_warning('This is a premium-only stream') |                 self.report_warning('This is a premium-only stream') | ||||||
|  |                 availability = 'premium_only' | ||||||
|             info.update(traverse_obj(api_response, { |             info.update(traverse_obj(api_response, { | ||||||
|                 'series': ('series', 'title'), |                 'series': ('series', 'title'), | ||||||
|                 'season': ('season', 'title'), |                 'season': ('season', 'name'), | ||||||
|                 'season_number': ('season', 'sequence'), |                 'season_number': ('season', 'sequence'), | ||||||
|                 'episode_number': ('episode', 'number'), |                 'episode_number': ('episode', 'number'), | ||||||
|             })) |             })) | ||||||
| @@ -395,6 +398,7 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|                 headers=headers) |                 headers=headers) | ||||||
|             if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False): |             if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False): | ||||||
|                 self.report_warning('This is a premium-only stream') |                 self.report_warning('This is a premium-only stream') | ||||||
|  |                 availability = 'premium_only' | ||||||
|  |  | ||||||
|             m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8' |             m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8' | ||||||
|         else: |         else: | ||||||
| @@ -412,19 +416,25 @@ class AbemaTVIE(AbemaTVBaseIE): | |||||||
|             'description': description, |             'description': description, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'is_live': is_live, |             'is_live': is_live, | ||||||
|  |             'availability': availability, | ||||||
|         }) |         }) | ||||||
|  |  | ||||||
|  |         if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None): | ||||||
|  |             info['thumbnails'] = [{'url': thumbnail}] | ||||||
|  |  | ||||||
|         return info |         return info | ||||||
|  |  | ||||||
|  |  | ||||||
| class AbemaTVTitleIE(AbemaTVBaseIE): | class AbemaTVTitleIE(AbemaTVBaseIE): | ||||||
|     _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)' |     _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?' | ||||||
|     _PAGE_SIZE = 25 |     _PAGE_SIZE = 25 | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://abema.tv/video/title/90-1597', |         'url': 'https://abema.tv/video/title/90-1887', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '90-1597', |             'id': '90-1887', | ||||||
|             'title': 'シャッフルアイランド', |             'title': 'シャッフルアイランド', | ||||||
|  |             'description': 'md5:61b2425308f41a5282a926edda66f178', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 2, |         'playlist_mincount': 2, | ||||||
|     }, { |     }, { | ||||||
| @@ -432,41 +442,54 @@ class AbemaTVTitleIE(AbemaTVBaseIE): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '193-132', |             'id': '193-132', | ||||||
|             'title': '真心が届く~僕とスターのオフィス・ラブ!?~', |             'title': '真心が届く~僕とスターのオフィス・ラブ!?~', | ||||||
|  |             'description': 'md5:9b59493d1f3a792bafbc7319258e7af8', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 16, |         'playlist_mincount': 16, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://abema.tv/video/title/25-102', |         'url': 'https://abema.tv/video/title/25-1nzan-whrxe', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '25-102', |             'id': '25-1nzan-whrxe', | ||||||
|             'title': 'ソードアート・オンライン アリシゼーション', |             'title': 'ソードアート・オンライン', | ||||||
|  |             'description': 'md5:c094904052322e6978495532bdbf06e6', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 24, |         'playlist_mincount': 25, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': '〈物語〉シリーズ', | ||||||
|  |             'id': '26-2mzbynr-cph', | ||||||
|  |             'description': 'md5:e67873de1c88f360af1f0a4b84847a52', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 59, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _fetch_page(self, playlist_id, series_version, page): |     def _fetch_page(self, playlist_id, series_version, season_id, page): | ||||||
|  |         query = { | ||||||
|  |             'seriesVersion': series_version, | ||||||
|  |             'offset': str(page * self._PAGE_SIZE), | ||||||
|  |             'order': 'seq', | ||||||
|  |             'limit': str(self._PAGE_SIZE), | ||||||
|  |         } | ||||||
|  |         if season_id: | ||||||
|  |             query['seasonId'] = season_id | ||||||
|         programs = self._call_api( |         programs = self._call_api( | ||||||
|             f'v1/video/series/{playlist_id}/programs', playlist_id, |             f'v1/video/series/{playlist_id}/programs', playlist_id, | ||||||
|             note=f'Downloading page {page + 1}', |             note=f'Downloading page {page + 1}', | ||||||
|             query={ |             query=query) | ||||||
|                 'seriesVersion': series_version, |  | ||||||
|                 'offset': str(page * self._PAGE_SIZE), |  | ||||||
|                 'order': 'seq', |  | ||||||
|                 'limit': str(self._PAGE_SIZE), |  | ||||||
|             }) |  | ||||||
|         yield from ( |         yield from ( | ||||||
|             self.url_result(f'https://abema.tv/video/episode/{x}') |             self.url_result(f'https://abema.tv/video/episode/{x}') | ||||||
|             for x in traverse_obj(programs, ('programs', ..., 'id'))) |             for x in traverse_obj(programs, ('programs', ..., 'id'))) | ||||||
|  |  | ||||||
|     def _entries(self, playlist_id, series_version): |     def _entries(self, playlist_id, series_version, season_id): | ||||||
|         return OnDemandPagedList( |         return OnDemandPagedList( | ||||||
|             functools.partial(self._fetch_page, playlist_id, series_version), |             functools.partial(self._fetch_page, playlist_id, series_version, season_id), | ||||||
|             self._PAGE_SIZE) |             self._PAGE_SIZE) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         playlist_id = self._match_id(url) |         playlist_id, season_id = self._match_valid_url(url).group('id', 'season') | ||||||
|         series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id) |         series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id) | ||||||
|  |  | ||||||
|         return self.playlist_result( |         return self.playlist_result( | ||||||
|             self._entries(playlist_id, series_info['version']), playlist_id=playlist_id, |             self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id, | ||||||
|             playlist_title=series_info.get('title'), |             playlist_title=series_info.get('title'), | ||||||
|             playlist_description=series_info.get('content')) |             playlist_description=series_info.get('content')) | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ from .common import InfoExtractor | |||||||
|  |  | ||||||
|  |  | ||||||
| class AcademicEarthCourseIE(InfoExtractor): | class AcademicEarthCourseIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' |     _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' | ||||||
|     IE_NAME = 'AcademicEarth:Course' |     IE_NAME = 'AcademicEarth:Course' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://academicearth.org/playlists/laws-of-nature/', |         'url': 'http://academicearth.org/playlists/laws-of-nature/', | ||||||
|   | |||||||
| @@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE): | |||||||
|     _VALID_URL = r'''(?x: |     _VALID_URL = r'''(?x: | ||||||
|                     https?:// |                     https?:// | ||||||
|                         (?: |                         (?: | ||||||
|                             (?:(?:embed|www)\.)?acast\.com/| |                             (?:(?:embed|www|shows)\.)?acast\.com/| | ||||||
|                             play\.acast\.com/s/ |                             play\.acast\.com/s/ | ||||||
|                         ) |                         ) | ||||||
|                         (?P<channel>[^/]+)/(?P<id>[^/#?"]+) |                         (?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+) | ||||||
|                     )''' |                     )''' | ||||||
|     _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})'] |     _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})'] | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', |         'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', |             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
| @@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE): | |||||||
|             'timestamp': 1477346700, |             'timestamp': 1477346700, | ||||||
|             'upload_date': '20161024', |             'upload_date': '20161024', | ||||||
|             'duration': 2766, |             'duration': 2766, | ||||||
|             'creator': 'Third Ear Studio', |             'creators': ['Third Ear Studio'], | ||||||
|             'series': 'Spår', |             'series': 'Spår', | ||||||
|             'episode': '2. Raggarmordet - Röster ur det förflutna', |             'episode': '2. Raggarmordet - Röster ur det förflutna', | ||||||
|             'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg', |             'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg', | ||||||
| @@ -67,13 +67,16 @@ class ACastIE(ACastBaseIE): | |||||||
|             'display_id': '2.raggarmordet-rosterurdetforflutna', |             'display_id': '2.raggarmordet-rosterurdetforflutna', | ||||||
|             'season_number': 4, |             'season_number': 4, | ||||||
|             'season': 'Season 4', |             'season': 'Season 4', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', |         'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2', |         'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', | ||||||
|  |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', |         'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -93,13 +96,13 @@ class ACastIE(ACastBaseIE): | |||||||
|             'series': 'Democracy Sausage with Mark Kenny', |             'series': 'Democracy Sausage with Mark Kenny', | ||||||
|             'timestamp': 1684826362, |             'timestamp': 1684826362, | ||||||
|             'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16', |             'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         channel, display_id = self._match_valid_url(url).groups() |         channel, display_id = self._match_valid_url(url).groups() | ||||||
|         episode = self._call_api( |         episode = self._call_api( | ||||||
|             '%s/episodes/%s' % (channel, display_id), |             f'{channel}/episodes/{display_id}', | ||||||
|             display_id, {'showInfo': 'true'}) |             display_id, {'showInfo': 'true'}) | ||||||
|         return self._extract_episode( |         return self._extract_episode( | ||||||
|             episode, self._extract_show_info(episode.get('show') or {})) |             episode, self._extract_show_info(episode.get('show') or {})) | ||||||
| @@ -110,7 +113,7 @@ class ACastChannelIE(ACastBaseIE): | |||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|                     https?:// |                     https?:// | ||||||
|                         (?: |                         (?: | ||||||
|                             (?:www\.)?acast\.com/| |                             (?:(?:www|shows)\.)?acast\.com/| | ||||||
|                             play\.acast\.com/s/ |                             play\.acast\.com/s/ | ||||||
|                         ) |                         ) | ||||||
|                         (?P<id>[^/#?]+) |                         (?P<id>[^/#?]+) | ||||||
| @@ -120,17 +123,20 @@ class ACastChannelIE(ACastBaseIE): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '4efc5294-5385-4847-98bd-519799ce5786', |             'id': '4efc5294-5385-4847-98bd-519799ce5786', | ||||||
|             'title': 'Today in Focus', |             'title': 'Today in Focus', | ||||||
|             'description': 'md5:c09ce28c91002ce4ffce71d6504abaae', |             'description': 'md5:feca253de9947634605080cd9eeea2bf', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 200, |         'playlist_mincount': 200, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://play.acast.com/s/ft-banking-weekly', |         'url': 'http://play.acast.com/s/ft-banking-weekly', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://shows.acast.com/sparpodcast', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) |         return False if ACastIE.suitable(url) else super().suitable(url) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         show_slug = self._match_id(url) |         show_slug = self._match_id(url) | ||||||
|   | |||||||
| @@ -3,9 +3,10 @@ from ..utils import ( | |||||||
|     float_or_none, |     float_or_none, | ||||||
|     format_field, |     format_field, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     traverse_obj, |  | ||||||
|     parse_codecs, |     parse_codecs, | ||||||
|     parse_qs, |     parse_qs, | ||||||
|  |     str_or_none, | ||||||
|  |     traverse_obj, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -24,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor): | |||||||
|                     'width': int_or_none(video.get('width')), |                     'width': int_or_none(video.get('width')), | ||||||
|                     'height': int_or_none(video.get('height')), |                     'height': int_or_none(video.get('height')), | ||||||
|                     'tbr': float_or_none(video.get('avgBitrate')), |                     'tbr': float_or_none(video.get('avgBitrate')), | ||||||
|                     **parse_codecs(video.get('codecs', '')) |                     **parse_codecs(video.get('codecs', '')), | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
| @@ -76,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE): | |||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'thumbnail': r're:^https?://.*\.(jpg|jpeg)', |             'thumbnail': r're:^https?://.*\.(jpg|jpeg)', | ||||||
|             'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', |             'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): | |||||||
|             'title': '红孩儿之趴趴蛙寻石记 第5话 ', |             'title': '红孩儿之趴趴蛙寻石记 第5话 ', | ||||||
|             'duration': 760.0, |             'duration': 760.0, | ||||||
|             'season': '红孩儿之趴趴蛙寻石记', |             'season': '红孩儿之趴趴蛙寻石记', | ||||||
|             'season_id': 5023171, |             'season_id': '5023171', | ||||||
|             'season_number': 1,  # series has only 1 season |             'season_number': 1,  # series has only 1 season | ||||||
|             'episode': 'Episode 5', |             'episode': 'Episode 5', | ||||||
|             'episode_number': 5, |             'episode_number': 5, | ||||||
| @@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): | |||||||
|             'title': '叽歪老表(第二季) 第5话 坚不可摧', |             'title': '叽歪老表(第二季) 第5话 坚不可摧', | ||||||
|             'season': '叽歪老表(第二季)', |             'season': '叽歪老表(第二季)', | ||||||
|             'season_number': 2, |             'season_number': 2, | ||||||
|             'season_id': 6065485, |             'season_id': '6065485', | ||||||
|             'episode': '坚不可摧', |             'episode': '坚不可摧', | ||||||
|             'episode_number': 5, |             'episode_number': 5, | ||||||
|             'upload_date': '20220324', |             'upload_date': '20220324', | ||||||
| @@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): | |||||||
|             'title': json_bangumi_data.get('showTitle'), |             'title': json_bangumi_data.get('showTitle'), | ||||||
|             'thumbnail': json_bangumi_data.get('image'), |             'thumbnail': json_bangumi_data.get('image'), | ||||||
|             'season': json_bangumi_data.get('bangumiTitle'), |             'season': json_bangumi_data.get('bangumiTitle'), | ||||||
|             'season_id': season_id, |             'season_id': str_or_none(season_id), | ||||||
|             'season_number': season_number, |             'season_number': season_number, | ||||||
|             'episode': json_bangumi_data.get('title'), |             'episode': json_bangumi_data.get('title'), | ||||||
|             'episode_number': episode_number, |             'episode_number': episode_number, | ||||||
|   | |||||||
| @@ -3,33 +3,53 @@ import binascii | |||||||
| import json | import json | ||||||
| import os | import os | ||||||
| import random | import random | ||||||
|  | import time | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 | from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 | ||||||
| from ..compat import compat_b64decode |  | ||||||
| from ..networking.exceptions import HTTPError | from ..networking.exceptions import HTTPError | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ass_subtitles_timecode, |  | ||||||
|     bytes_to_intlist, |  | ||||||
|     bytes_to_long, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     ass_subtitles_timecode, | ||||||
|  |     bytes_to_long, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     intlist_to_bytes, |     join_nonempty, | ||||||
|     long_to_bytes, |     long_to_bytes, | ||||||
|  |     parse_iso8601, | ||||||
|     pkcs1pad, |     pkcs1pad, | ||||||
|  |     str_or_none, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|     try_get, |     try_get, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
| ) | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
| class ADNIE(InfoExtractor): | class ADNBaseIE(InfoExtractor): | ||||||
|     IE_DESC = 'Animation Digital Network' |     IE_DESC = 'Animation Digital Network' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' |     _NETRC_MACHINE = 'animationdigitalnetwork' | ||||||
|  |     _BASE = 'animationdigitalnetwork.fr' | ||||||
|  |     _API_BASE_URL = f'https://gw.api.{_BASE}/' | ||||||
|  |     _PLAYER_BASE_URL = f'{_API_BASE_URL}player/' | ||||||
|  |     _HEADERS = {} | ||||||
|  |     _LOGIN_ERR_MESSAGE = 'Unable to log in' | ||||||
|  |     _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) | ||||||
|  |     _POS_ALIGN_MAP = { | ||||||
|  |         'start': 1, | ||||||
|  |         'end': 3, | ||||||
|  |     } | ||||||
|  |     _LINE_ALIGN_MAP = { | ||||||
|  |         'middle': 8, | ||||||
|  |         'end': 4, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ADNIE(ADNBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', |         'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir', | ||||||
|         'md5': '1c9ef066ceb302c86f80c2b371615261', |         'md5': '1c9ef066ceb302c86f80c2b371615261', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '9841', |             'id': '9841', | ||||||
| @@ -44,29 +64,32 @@ class ADNIE(InfoExtractor): | |||||||
|             'season_number': 1, |             'season_number': 1, | ||||||
|             'episode': 'À ce soir !', |             'episode': 'À ce soir !', | ||||||
|             'episode_number': 1, |             'episode_number': 1, | ||||||
|  |             'thumbnail': str, | ||||||
|  |             'season': 'Season 1', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Only available in region (FR, ...)', |         'skip': 'Only available in French and German speaking Europe', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', |         'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1', | ||||||
|         'only_matching': True, |         'md5': '5c5651bf5791fa6fcd7906012b9d94e8', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '23550', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'episode_number': 1, | ||||||
|  |             'duration': 1417, | ||||||
|  |             'release_date': '20231004', | ||||||
|  |             'series': 'The Eminence in Shadow', | ||||||
|  |             'season_number': 2, | ||||||
|  |             'episode': str, | ||||||
|  |             'title': str, | ||||||
|  |             'thumbnail': str, | ||||||
|  |             'season': 'Season 2', | ||||||
|  |             'comment_count': int, | ||||||
|  |             'average_rating': float, | ||||||
|  |             'description': str, | ||||||
|  |         }, | ||||||
|  |         # 'skip': 'Only available in French and German speaking Europe', | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     _NETRC_MACHINE = 'animationdigitalnetwork' |  | ||||||
|     _BASE = 'animationdigitalnetwork.fr' |  | ||||||
|     _API_BASE_URL = 'https://gw.api.' + _BASE + '/' |  | ||||||
|     _PLAYER_BASE_URL = _API_BASE_URL + 'player/' |  | ||||||
|     _HEADERS = {} |  | ||||||
|     _LOGIN_ERR_MESSAGE = 'Unable to log in' |  | ||||||
|     _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) |  | ||||||
|     _POS_ALIGN_MAP = { |  | ||||||
|         'start': 1, |  | ||||||
|         'end': 3, |  | ||||||
|     } |  | ||||||
|     _LINE_ALIGN_MAP = { |  | ||||||
|         'middle': 8, |  | ||||||
|         'end': 4, |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     def _get_subtitles(self, sub_url, video_id): |     def _get_subtitles(self, sub_url, video_id): | ||||||
|         if not sub_url: |         if not sub_url: | ||||||
|             return None |             return None | ||||||
| @@ -83,9 +106,9 @@ class ADNIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js |         # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||||
|         dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( |         dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( | ||||||
|             compat_b64decode(enc_subtitles[24:]), |             base64.b64decode(enc_subtitles[24:]), | ||||||
|             binascii.unhexlify(self._K + '7fac1178830cfe0c'), |             binascii.unhexlify(self._K + '7fac1178830cfe0c'), | ||||||
|             compat_b64decode(enc_subtitles[:24]))) |             base64.b64decode(enc_subtitles[:24]))) | ||||||
|         subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) |         subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) | ||||||
|         if not subtitles_json: |         if not subtitles_json: | ||||||
|             return None |             return None | ||||||
| @@ -108,7 +131,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|                 if start is None or end is None or text is None: |                 if start is None or end is None or text is None: | ||||||
|                     continue |                     continue | ||||||
|                 alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) |                 alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) | ||||||
|                 ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( |                 ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format( | ||||||
|                     ass_subtitles_timecode(start), |                     ass_subtitles_timecode(start), | ||||||
|                     ass_subtitles_timecode(end), |                     ass_subtitles_timecode(end), | ||||||
|                     '{\\a%d}' % alignment if alignment != 2 else '', |                     '{\\a%d}' % alignment if alignment != 2 else '', | ||||||
| @@ -116,6 +139,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|  |  | ||||||
|             if sub_lang == 'vostf': |             if sub_lang == 'vostf': | ||||||
|                 sub_lang = 'fr' |                 sub_lang = 'fr' | ||||||
|  |             elif sub_lang == 'vostde': | ||||||
|  |                 sub_lang = 'de' | ||||||
|             subtitles.setdefault(sub_lang, []).extend([{ |             subtitles.setdefault(sub_lang, []).extend([{ | ||||||
|                 'ext': 'json', |                 'ext': 'json', | ||||||
|                 'data': json.dumps(sub), |                 'data': json.dumps(sub), | ||||||
| @@ -137,7 +162,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|                     'username': username, |                     'username': username, | ||||||
|                 })) or {}).get('accessToken') |                 })) or {}).get('accessToken') | ||||||
|             if access_token: |             if access_token: | ||||||
|                 self._HEADERS = {'authorization': 'Bearer ' + access_token} |                 self._HEADERS['Authorization'] = f'Bearer {access_token}' | ||||||
|         except ExtractorError as e: |         except ExtractorError as e: | ||||||
|             message = None |             message = None | ||||||
|             if isinstance(e.cause, HTTPError) and e.cause.status == 401: |             if isinstance(e.cause, HTTPError) and e.cause.status == 401: | ||||||
| @@ -147,8 +172,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|             self.report_warning(message or self._LOGIN_ERR_MESSAGE) |             self.report_warning(message or self._LOGIN_ERR_MESSAGE) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         lang, video_id = self._match_valid_url(url).group('lang', 'id') | ||||||
|         video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id |         self._HEADERS['X-Target-Distribution'] = lang or 'fr' | ||||||
|  |         video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/' | ||||||
|         player = self._download_json( |         player = self._download_json( | ||||||
|             video_base_url + 'configuration', video_id, |             video_base_url + 'configuration', video_id, | ||||||
|             'Downloading player config JSON metadata', |             'Downloading player config JSON metadata', | ||||||
| @@ -157,26 +183,29 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|  |  | ||||||
|         user = options['user'] |         user = options['user'] | ||||||
|         if not user.get('hasAccess'): |         if not user.get('hasAccess'): | ||||||
|             self.raise_login_required() |             start_date = traverse_obj(options, ('video', 'startDate', {str})) | ||||||
|  |             if (parse_iso8601(start_date) or 0) > time.time(): | ||||||
|  |                 raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True) | ||||||
|  |             self.raise_login_required('This video requires a subscription', method='password') | ||||||
|  |  | ||||||
|         token = self._download_json( |         token = self._download_json( | ||||||
|             user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), |             user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), | ||||||
|             video_id, 'Downloading access token', headers={ |             video_id, 'Downloading access token', headers={ | ||||||
|                 'x-player-refresh-token': user['refreshToken'] |                 'X-Player-Refresh-Token': user['refreshToken'], | ||||||
|             }, data=b'')['token'] |             }, data=b'')['token'] | ||||||
|  |  | ||||||
|         links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') |         links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') | ||||||
|         self._K = ''.join(random.choices('0123456789abcdef', k=16)) |         self._K = ''.join(random.choices('0123456789abcdef', k=16)) | ||||||
|         message = bytes_to_intlist(json.dumps({ |         message = list(json.dumps({ | ||||||
|             'k': self._K, |             'k': self._K, | ||||||
|             't': token, |             't': token, | ||||||
|         })) |         }).encode()) | ||||||
|  |  | ||||||
|         # Sometimes authentication fails for no good reason, retry with |         # Sometimes authentication fails for no good reason, retry with | ||||||
|         # a different random padding |         # a different random padding | ||||||
|         links_data = None |         links_data = None | ||||||
|         for _ in range(3): |         for _ in range(3): | ||||||
|             padded_message = intlist_to_bytes(pkcs1pad(message, 128)) |             padded_message = bytes(pkcs1pad(message, 128)) | ||||||
|             n, e = self._RSA_KEY |             n, e = self._RSA_KEY | ||||||
|             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) |             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) | ||||||
|             authorization = base64.b64encode(encrypted_message).decode() |             authorization = base64.b64encode(encrypted_message).decode() | ||||||
| @@ -184,12 +213,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|             try: |             try: | ||||||
|                 links_data = self._download_json( |                 links_data = self._download_json( | ||||||
|                     links_url, video_id, 'Downloading links JSON metadata', headers={ |                     links_url, video_id, 'Downloading links JSON metadata', headers={ | ||||||
|                         'X-Player-Token': authorization |                         'X-Player-Token': authorization, | ||||||
|  |                         **self._HEADERS, | ||||||
|                     }, query={ |                     }, query={ | ||||||
|                         'freeWithAds': 'true', |                         'freeWithAds': 'true', | ||||||
|                         'adaptive': 'false', |                         'adaptive': 'false', | ||||||
|                         'withMetadata': 'true', |                         'withMetadata': 'true', | ||||||
|                         'source': 'Web' |                         'source': 'Web', | ||||||
|                     }) |                     }) | ||||||
|                 break |                 break | ||||||
|             except ExtractorError as e: |             except ExtractorError as e: | ||||||
| @@ -202,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|  |  | ||||||
|                 error = self._parse_json(e.cause.response.read(), video_id) |                 error = self._parse_json(e.cause.response.read(), video_id) | ||||||
|                 message = error.get('message') |                 message = error.get('message') | ||||||
|                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': |                 if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country': | ||||||
|                     self.raise_geo_restricted(msg=message) |                     self.raise_geo_restricted(msg=message) | ||||||
|                 raise ExtractorError(message) |                 raise ExtractorError(message) | ||||||
|         else: |         else: | ||||||
| @@ -221,7 +251,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|             for quality, load_balancer_url in qualities.items(): |             for quality, load_balancer_url in qualities.items(): | ||||||
|                 load_balancer_data = self._download_json( |                 load_balancer_data = self._download_json( | ||||||
|                     load_balancer_url, video_id, |                     load_balancer_url, video_id, | ||||||
|                     'Downloading %s %s JSON metadata' % (format_id, quality), |                     f'Downloading {format_id} {quality} JSON metadata', | ||||||
|  |                     headers=self._HEADERS, | ||||||
|                     fatal=False) or {} |                     fatal=False) or {} | ||||||
|                 m3u8_url = load_balancer_data.get('location') |                 m3u8_url = load_balancer_data.get('location') | ||||||
|                 if not m3u8_url: |                 if not m3u8_url: | ||||||
| @@ -232,11 +263,17 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|                 if format_id == 'vf': |                 if format_id == 'vf': | ||||||
|                     for f in m3u8_formats: |                     for f in m3u8_formats: | ||||||
|                         f['language'] = 'fr' |                         f['language'] = 'fr' | ||||||
|  |                 elif format_id == 'vde': | ||||||
|  |                     for f in m3u8_formats: | ||||||
|  |                         f['language'] = 'de' | ||||||
|                 formats.extend(m3u8_formats) |                 formats.extend(m3u8_formats) | ||||||
|  |  | ||||||
|  |         if not formats: | ||||||
|  |             self.raise_login_required('This video requires a subscription', method='password') | ||||||
|  |  | ||||||
|         video = (self._download_json( |         video = (self._download_json( | ||||||
|             self._API_BASE_URL + 'video/%s' % video_id, video_id, |             self._API_BASE_URL + f'video/{video_id}', video_id, | ||||||
|             'Downloading additional video metadata', fatal=False) or {}).get('video') or {} |             'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {} | ||||||
|         show = video.get('show') or {} |         show = video.get('show') or {} | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
| @@ -255,3 +292,38 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | |||||||
|             'average_rating': float_or_none(video.get('rating') or metas.get('rating')), |             'average_rating': float_or_none(video.get('rating') or metas.get('rating')), | ||||||
|             'comment_count': int_or_none(video.get('commentsCount')), |             'comment_count': int_or_none(video.get('commentsCount')), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ADNSeasonIE(ADNBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new', | ||||||
|  |         'playlist_count': 12, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '911', | ||||||
|  |             'title': 'Tokyo Mew Mew New', | ||||||
|  |         }, | ||||||
|  |         # 'skip': 'Only available in French end German speaking Europe', | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         lang, video_show_slug = self._match_valid_url(url).group('lang', 'id') | ||||||
|  |         self._HEADERS['X-Target-Distribution'] = lang or 'fr' | ||||||
|  |         show = self._download_json( | ||||||
|  |             f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug, | ||||||
|  |             'Downloading show JSON metadata', headers=self._HEADERS)['show'] | ||||||
|  |         show_id = str(show['id']) | ||||||
|  |         episodes = self._download_json( | ||||||
|  |             f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, | ||||||
|  |             'Downloading episode list', headers=self._HEADERS, query={ | ||||||
|  |                 'order': 'asc', | ||||||
|  |                 'limit': '-1', | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         def entries(): | ||||||
|  |             for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})): | ||||||
|  |                 yield self.url_result(join_nonempty( | ||||||
|  |                     'https://animationdigitalnetwork.com', lang, 'video', | ||||||
|  |                     video_show_slug, episode_id, delim='/'), ADNIE, episode_id) | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries(), show_id, show.get('title')) | ||||||
|   | |||||||
| @@ -1,8 +1,6 @@ | |||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( |  | ||||||
|     compat_parse_qs, |  | ||||||
|     compat_urlparse, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AdobeConnectIE(InfoExtractor): | class AdobeConnectIE(InfoExtractor): | ||||||
| @@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor): | |||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         title = self._html_extract_title(webpage) |         title = self._html_extract_title(webpage) | ||||||
|         qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) |         qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) | ||||||
|         is_live = qs.get('isLive', ['false'])[0] == 'true' |         is_live = qs.get('isLive', ['false'])[0] == 'true' | ||||||
|         formats = [] |         formats = [] | ||||||
|         for con_string in qs['conStrings'][0].split(','): |         for con_string in qs['conStrings'][0].split(','): | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'format_id': con_string.split('://')[0], |                 'format_id': con_string.split('://')[0], | ||||||
|                 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), |                 'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|                 'play_path': 'mp4:' + qs['streamName'][0], |                 'play_path': 'mp4:' + qs['streamName'][0], | ||||||
|                 'rtmp_conn': 'S:' + qs['ticket'][0], |                 'rtmp_conn': 'S:' + qs['ticket'][0], | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -2,13 +2,12 @@ import functools | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     ISO639Utils, | ||||||
|  |     OnDemandPagedList, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     ISO639Utils, |  | ||||||
|     join_nonempty, |     join_nonempty, | ||||||
|     OnDemandPagedList, |  | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     str_or_none, |     str_or_none, | ||||||
|     str_to_int, |     str_to_int, | ||||||
| @@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor): | |||||||
|         return subtitles |         return subtitles | ||||||
|  |  | ||||||
|     def _parse_video_data(self, video_data): |     def _parse_video_data(self, video_data): | ||||||
|         video_id = compat_str(video_data['id']) |         video_id = str(video_data['id']) | ||||||
|         title = video_data['title'] |         title = video_data['title'] | ||||||
|  |  | ||||||
|         s3_extracted = False |         s3_extracted = False | ||||||
| @@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): | |||||||
|         page += 1 |         page += 1 | ||||||
|         query['page'] = page |         query['page'] = page | ||||||
|         for element_data in self._call_api( |         for element_data in self._call_api( | ||||||
|                 self._RESOURCE, display_id, query, 'Download Page %d' % page): |                 self._RESOURCE, display_id, query, f'Download Page {page}'): | ||||||
|             yield self._process_data(element_data) |             yield self._process_data(element_data) | ||||||
|  |  | ||||||
|     def _extract_playlist_entries(self, display_id, query): |     def _extract_playlist_entries(self, display_id, query): | ||||||
|   | |||||||
| @@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE): | |||||||
|   getShowBySlug(slug:"%s") { |   getShowBySlug(slug:"%s") { | ||||||
|     %%s |     %%s | ||||||
|   } |   } | ||||||
| }''' % show_path | }''' % show_path  # noqa: UP031 | ||||||
|         if episode_path: |         if episode_path: | ||||||
|             query = query % '''title |             query = query % '''title | ||||||
|     getVideoBySlug(slug:"%s") { |     getVideoBySlug(slug:"%s") { | ||||||
| @@ -107,7 +107,6 @@ class AdultSwimIE(TurnerBaseIE): | |||||||
|       title |       title | ||||||
|       tvRating |       tvRating | ||||||
|     }''' % episode_path |     }''' % episode_path | ||||||
|             ['getVideoBySlug'] |  | ||||||
|         else: |         else: | ||||||
|             query = query % '''metaDescription |             query = query % '''metaDescription | ||||||
|     title |     title | ||||||
| @@ -129,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE): | |||||||
|             episode_title = title = video_data['title'] |             episode_title = title = video_data['title'] | ||||||
|             series = show_data.get('title') |             series = show_data.get('title') | ||||||
|             if series: |             if series: | ||||||
|                 title = '%s - %s' % (series, title) |                 title = f'{series} - {title}' | ||||||
|             info = { |             info = { | ||||||
|                 'id': video_id, |                 'id': video_id, | ||||||
|                 'title': title, |                 'title': title, | ||||||
| @@ -192,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE): | |||||||
|                 if not slug: |                 if not slug: | ||||||
|                     continue |                     continue | ||||||
|                 entries.append(self.url_result( |                 entries.append(self.url_result( | ||||||
|                     'http://adultswim.com/videos/%s/%s' % (show_path, slug), |                     f'http://adultswim.com/videos/{show_path}/{slug}', | ||||||
|                     'AdultSwim', video.get('_id'))) |                     'AdultSwim', video.get('_id'))) | ||||||
|             return self.playlist_result( |             return self.playlist_result( | ||||||
|                 entries, show_path, show_data.get('title'), |                 entries, show_path, show_data.get('title'), | ||||||
|   | |||||||
| @@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|     def _extract_aetn_info(self, domain, filter_key, filter_value, url): |     def _extract_aetn_info(self, domain, filter_key, filter_value, url): | ||||||
|         requestor_id, brand = self._DOMAIN_MAP[domain] |         requestor_id, brand = self._DOMAIN_MAP[domain] | ||||||
|         result = self._download_json( |         result = self._download_json( | ||||||
|             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, |             f'https://feeds.video.aetnd.com/api/v2/{brand}/videos', | ||||||
|             filter_value, query={'filter[%s]' % filter_key: filter_value}) |             filter_value, query={f'filter[{filter_key}]': filter_value}) | ||||||
|         result = traverse_obj( |         result = traverse_obj( | ||||||
|             result, ('results', |             result, ('results', | ||||||
|                      lambda k, v: k == 0 and v[filter_key] == filter_value), |                      lambda k, v: k == 0 and v[filter_key] == filter_value), | ||||||
| @@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|             resource = self._get_mvpd_resource( |             resource = self._get_mvpd_resource( | ||||||
|                 requestor_id, theplatform_metadata['title'], |                 requestor_id, theplatform_metadata['title'], | ||||||
|                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), |                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), | ||||||
|                 theplatform_metadata['ratings'][0]['rating']) |                 traverse_obj(theplatform_metadata, ('ratings', 0, 'rating'))) | ||||||
|             auth = self._extract_mvpd_auth( |             auth = self._extract_mvpd_auth( | ||||||
|                 url, video_id, requestor_id, resource) |                 url, video_id, requestor_id, resource) | ||||||
|         info.update(self._extract_aen_smil(media_url, video_id, auth)) |         info.update(self._extract_aen_smil(media_url, video_id, auth)) | ||||||
| @@ -121,18 +121,28 @@ class AENetworksIE(AENetworksBaseIE): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '22253814', |             'id': '22253814', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Winter is Coming', |             'title': 'Winter Is Coming', | ||||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', |             'description': 'md5:a40e370925074260b1c8a633c632c63a', | ||||||
|             'timestamp': 1338306241, |             'timestamp': 1338306241, | ||||||
|             'upload_date': '20120529', |             'upload_date': '20120529', | ||||||
|             'uploader': 'AENE-NEW', |             'uploader': 'AENE-NEW', | ||||||
|  |             'duration': 2592.0, | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpe?g$', | ||||||
|  |             'chapters': 'count:5', | ||||||
|  |             'tags': 'count:14', | ||||||
|  |             'categories': ['Mountain Men'], | ||||||
|  |             'episode_number': 1, | ||||||
|  |             'episode': 'Episode 1', | ||||||
|  |             'season': 'Season 1', | ||||||
|  |             'season_number': 1, | ||||||
|  |             'series': 'Mountain Men', | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|         'add_ie': ['ThePlatform'], |         'add_ie': ['ThePlatform'], | ||||||
|         'skip': 'Geo-restricted - This content is not available in your location.' |         'skip': 'Geo-restricted - This content is not available in your location.', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', |         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE): | |||||||
|             'timestamp': 1452634428, |             'timestamp': 1452634428, | ||||||
|             'upload_date': '20160112', |             'upload_date': '20160112', | ||||||
|             'uploader': 'AENE-NEW', |             'uploader': 'AENE-NEW', | ||||||
|  |             'duration': 1277.695, | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpe?g$', | ||||||
|  |             'chapters': 'count:4', | ||||||
|  |             'tags': 'count:23', | ||||||
|  |             'episode': 'Episode 1', | ||||||
|  |             'episode_number': 1, | ||||||
|  |             'season': 'Season 9', | ||||||
|  |             'season_number': 9, | ||||||
|  |             'series': 'Duck Dynasty', | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
| @@ -152,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE): | |||||||
|         'skip': 'This video is only available for users of participating TV providers.', |         'skip': 'This video is only available for users of participating TV providers.', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', |         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', |         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', |         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie', |         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', |         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', |         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.history.com/videos/history-of-valentines-day', |         'url': 'http://www.history.com/videos/history-of-valentines-day', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape', |         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -190,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE): | |||||||
|   %s(slug: "%s") { |   %s(slug: "%s") { | ||||||
|     %s |     %s | ||||||
|   } |   } | ||||||
| }''' % (resource, slug, fields), | }''' % (resource, slug, fields),  # noqa: UP031 | ||||||
|             }))['data'][resource] |             }))['data'][resource] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         domain, slug = self._match_valid_url(url).groups() |         domain, slug = self._match_valid_url(url).groups() | ||||||
|         _, brand = self._DOMAIN_MAP[domain] |         _, brand = self._DOMAIN_MAP[domain] | ||||||
|         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) |         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) | ||||||
|         base_url = 'http://watch.%s' % domain |         base_url = f'http://watch.{domain}' | ||||||
|  |  | ||||||
|         entries = [] |         entries = [] | ||||||
|         for item in (playlist.get(self._ITEMS_KEY) or []): |         for item in (playlist.get(self._ITEMS_KEY) or []): | ||||||
| @@ -229,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE): | |||||||
|         'playlist_mincount': 12, |         'playlist_mincount': 12, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us', |         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.historyvault.com/collections/mysteryquest', |         'url': 'https://www.historyvault.com/collections/mysteryquest', | ||||||
|         'only_matching': True |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|     _RESOURCE = 'list' |     _RESOURCE = 'list' | ||||||
|     _ITEMS_KEY = 'items' |     _ITEMS_KEY = 'items' | ||||||
| @@ -290,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '40700995724', |             'id': '40700995724', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': "History of Valentine’s Day", |             'title': 'History of Valentine’s Day', | ||||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', |             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||||
|             'timestamp': 1375819729, |             'timestamp': 1375819729, | ||||||
|             'upload_date': '20130806', |             'upload_date': '20130806', | ||||||
| @@ -338,12 +357,13 @@ class BiographyIE(AENetworksBaseIE): | |||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|         'add_ie': ['ThePlatform'], |         'add_ie': ['ThePlatform'], | ||||||
|  |         'skip': '404 Not Found', | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         display_id = self._match_id(url) |         display_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, display_id) |         webpage = self._download_webpage(url, display_id) | ||||||
|         player_url = self._search_regex( |         player_url = self._search_regex( | ||||||
|             r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL, |             rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})', | ||||||
|             webpage, 'player URL') |             webpage, 'player URL') | ||||||
|         return self.url_result(player_url, HistoryPlayerIE.ie_key()) |         return self.url_result(player_url, HistoryPlayerIE.ie_key()) | ||||||
|   | |||||||
| @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): | |||||||
|             'uploader': 'Semiconductor', |             'uploader': 'Semiconductor', | ||||||
|             'uploader_id': 'semiconductor', |             'uploader_id': 'semiconductor', | ||||||
|             'uploader_url': 'https://vimeo.com/semiconductor', |             'uploader_url': 'https://vimeo.com/semiconductor', | ||||||
|             'duration': 348 |             'duration': 348, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', |         'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', | ||||||
|         'md5': '03582d795382e49f2fd0b427b55de409', |         'md5': '03582d795382e49f2fd0b427b55de409', | ||||||
| @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): | |||||||
|             'uploader': 'Aeon Video', |             'uploader': 'Aeon Video', | ||||||
|             'uploader_id': 'aeonvideo', |             'uploader_id': 'aeonvideo', | ||||||
|             'uploader_url': 'https://vimeo.com/aeonvideo', |             'uploader_url': 'https://vimeo.com/aeonvideo', | ||||||
|             'duration': 1344 |             'duration': 1344, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', |         'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', | ||||||
|         'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', |         'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', | ||||||
|   | |||||||
| @@ -1,142 +1,26 @@ | |||||||
|  | import datetime as dt | ||||||
| import functools | import functools | ||||||
| import re |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..networking import Request | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     OnDemandPagedList, |     OnDemandPagedList, | ||||||
|     date_from_str, |     UserNotLive, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|  |     filter_dict, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     qualities, |     orderedSet, | ||||||
|     traverse_obj, |     parse_iso8601, | ||||||
|     unified_strdate, |  | ||||||
|     unified_timestamp, |  | ||||||
|     update_url_query, |  | ||||||
|     url_or_none, |     url_or_none, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
|     xpath_text, |     urljoin, | ||||||
| ) | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
| class AfreecaTVIE(InfoExtractor): | class AfreecaTVBaseIE(InfoExtractor): | ||||||
|     IE_NAME = 'afreecatv' |  | ||||||
|     IE_DESC = 'afreecatv.com' |  | ||||||
|     _VALID_URL = r'''(?x) |  | ||||||
|                     https?:// |  | ||||||
|                         (?: |  | ||||||
|                             (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? |  | ||||||
|                             (?: |  | ||||||
|                                 /app/(?:index|read_ucc_bbs)\.cgi| |  | ||||||
|                                 /player/[Pp]layer\.(?:swf|html) |  | ||||||
|                             )\?.*?\bnTitleNo=| |  | ||||||
|                             vod\.afreecatv\.com/(PLAYER/STATION|player)/ |  | ||||||
|                         ) |  | ||||||
|                         (?P<id>\d+) |  | ||||||
|                     ''' |  | ||||||
|     _NETRC_MACHINE = 'afreecatv' |     _NETRC_MACHINE = 'afreecatv' | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', |  | ||||||
|         'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '36164052', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': '데일리 에이프릴 요정들의 시상식!', |  | ||||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', |  | ||||||
|             'uploader': 'dailyapril', |  | ||||||
|             'uploader_id': 'dailyapril', |  | ||||||
|             'upload_date': '20160503', |  | ||||||
|         }, |  | ||||||
|         'skip': 'Video is gone', |  | ||||||
|     }, { |  | ||||||
|         'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '36153164', |  | ||||||
|             'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", |  | ||||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', |  | ||||||
|             'uploader': 'dailyapril', |  | ||||||
|             'uploader_id': 'dailyapril', |  | ||||||
|         }, |  | ||||||
|         'playlist_count': 2, |  | ||||||
|         'playlist': [{ |  | ||||||
|             'md5': 'd8b7c174568da61d774ef0203159bf97', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '36153164_1', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", |  | ||||||
|                 'upload_date': '20160502', |  | ||||||
|             }, |  | ||||||
|         }, { |  | ||||||
|             'md5': '58f2ce7f6044e34439ab2d50612ab02b', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '36153164_2', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", |  | ||||||
|                 'upload_date': '20160502', |  | ||||||
|             }, |  | ||||||
|         }], |  | ||||||
|         'skip': 'Video is gone', |  | ||||||
|     }, { |  | ||||||
|         # non standard key |  | ||||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '20170411_BE689A0E_190960999_1_2_h', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': '혼자사는여자집', |  | ||||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', |  | ||||||
|             'uploader': '♥이슬이', |  | ||||||
|             'uploader_id': 'dasl8121', |  | ||||||
|             'upload_date': '20170411', |  | ||||||
|             'duration': 213, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }, { |  | ||||||
|         # adult content |  | ||||||
|         'url': 'https://vod.afreecatv.com/player/97267690', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '20180327_27901457_202289533_1', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': '[생]빨개요♥ (part 1)', |  | ||||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', |  | ||||||
|             'uploader': '[SA]서아', |  | ||||||
|             'uploader_id': 'bjdyrksu', |  | ||||||
|             'upload_date': '20180327', |  | ||||||
|             'duration': 3601, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|         'skip': 'The VOD does not exist', |  | ||||||
|     }, { |  | ||||||
|         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://vod.afreecatv.com/player/96753363', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '20230108_9FF5BEE1_244432674_1', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'uploader_id': 'rlantnghks', |  | ||||||
|             'uploader': '페이즈으', |  | ||||||
|             'duration': 10840, |  | ||||||
|             'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r', |  | ||||||
|             'upload_date': '20230108', |  | ||||||
|             'title': '젠지 페이즈', |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     @staticmethod |  | ||||||
|     def parse_video_key(key): |  | ||||||
|         video_key = {} |  | ||||||
|         m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key) |  | ||||||
|         if m: |  | ||||||
|             video_key['upload_date'] = m.group('upload_date') |  | ||||||
|             video_key['part'] = int(m.group('part')) |  | ||||||
|         return video_key |  | ||||||
|  |  | ||||||
|     def _perform_login(self, username, password): |     def _perform_login(self, username, password): | ||||||
|         login_form = { |         login_form = { | ||||||
| @@ -150,21 +34,21 @@ class AfreecaTVIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         response = self._download_json( |         response = self._download_json( | ||||||
|             'https://login.afreecatv.com/app/LoginAction.php', None, |             'https://login.sooplive.co.kr/app/LoginAction.php', None, | ||||||
|             'Logging in', data=urlencode_postdata(login_form)) |             'Logging in', data=urlencode_postdata(login_form)) | ||||||
|  |  | ||||||
|         _ERRORS = { |         _ERRORS = { | ||||||
|             -4: 'Your account has been suspended due to a violation of our terms and policies.', |             -4: 'Your account has been suspended due to a violation of our terms and policies.', | ||||||
|             -5: 'https://member.afreecatv.com/app/user_delete_progress.php', |             -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', | ||||||
|             -6: 'https://login.afreecatv.com/membership/changeMember.php', |             -6: 'https://login.sooplive.co.kr/membership/changeMember.php', | ||||||
|             -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", |             -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", | ||||||
|             -9: 'https://member.afreecatv.com/app/pop_login_block.php', |             -9: 'https://member.sooplive.co.kr/app/pop_login_block.php', | ||||||
|             -11: 'https://login.afreecatv.com/afreeca/second_login.php', |             -11: 'https://login.sooplive.co.kr/afreeca/second_login.php', | ||||||
|             -12: 'https://member.afreecatv.com/app/user_security.php', |             -12: 'https://member.sooplive.co.kr/app/user_security.php', | ||||||
|             0: 'The username does not exist or you have entered the wrong password.', |             0: 'The username does not exist or you have entered the wrong password.', | ||||||
|             -1: 'The username does not exist or you have entered the wrong password.', |             -1: 'The username does not exist or you have entered the wrong password.', | ||||||
|             -3: 'You have entered your username/password incorrectly.', |             -3: 'You have entered your username/password incorrectly.', | ||||||
|             -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', |             -7: 'You cannot use your Global Soop account to access Korean Soop.', | ||||||
|             -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', |             -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', | ||||||
|             -32008: 'You have failed to log in. Please contact our Help Center.', |             -32008: 'You have failed to log in. Please contact our Help Center.', | ||||||
|         } |         } | ||||||
| @@ -173,169 +57,206 @@ class AfreecaTVIE(InfoExtractor): | |||||||
|         if result != 1: |         if result != 1: | ||||||
|             error = _ERRORS.get(result, 'You have failed to log in.') |             error = _ERRORS.get(result, 'You have failed to log in.') | ||||||
|             raise ExtractorError( |             raise ExtractorError( | ||||||
|                 'Unable to login: %s said: %s' % (self.IE_NAME, error), |                 f'Unable to login: {self.IE_NAME} said: {error}', | ||||||
|                 expected=True) |                 expected=True) | ||||||
|  |  | ||||||
|  |     def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): | ||||||
|  |         return self._download_json(Request( | ||||||
|  |             f'https://api.m.sooplive.co.kr/{endpoint}', | ||||||
|  |             data=data, headers=headers, query=query, | ||||||
|  |             extensions={'legacy_ssl': True}), display_id, | ||||||
|  |             'Downloading API JSON', 'Unable to download API JSON') | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _fixup_thumb(thumb_url): | ||||||
|  |         if not url_or_none(thumb_url): | ||||||
|  |             return None | ||||||
|  |         # Core would determine_ext as 'php' from the url, so we need to provide the real ext | ||||||
|  |         # See: https://github.com/yt-dlp/yt-dlp/issues/11537 | ||||||
|  |         return [{'url': thumb_url, 'ext': 'jpg'}] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AfreecaTVIE(AfreecaTVBaseIE): | ||||||
|  |     IE_NAME = 'soop' | ||||||
|  |     IE_DESC = 'sooplive.co.kr' | ||||||
|  |     _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://vod.sooplive.co.kr/player/96753363', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '20230108_9FF5BEE1_244432674_1', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'uploader_id': 'rlantnghks', | ||||||
|  |             'uploader': '페이즈으', | ||||||
|  |             'duration': 10840, | ||||||
|  |             'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', | ||||||
|  |             'upload_date': '20230108', | ||||||
|  |             'timestamp': 1673186405, | ||||||
|  |             'title': '젠지 페이즈', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # non standard key | ||||||
|  |         'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '20170411_BE689A0E_190960999_1_2_h', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '혼자사는여자집', | ||||||
|  |             'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', | ||||||
|  |             'uploader': '♥이슬이', | ||||||
|  |             'uploader_id': 'dasl8121', | ||||||
|  |             'upload_date': '20170411', | ||||||
|  |             'timestamp': 1491929865, | ||||||
|  |             'duration': 213, | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # adult content | ||||||
|  |         'url': 'https://vod.sooplive.co.kr/player/97267690', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '20180327_27901457_202289533_1', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '[생]빨개요♥ (part 1)', | ||||||
|  |             'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+', | ||||||
|  |             'uploader': '[SA]서아', | ||||||
|  |             'uploader_id': 'bjdyrksu', | ||||||
|  |             'upload_date': '20180327', | ||||||
|  |             'duration': 3601, | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |         'skip': 'The VOD does not exist', | ||||||
|  |     }, { | ||||||
|  |         # adult content | ||||||
|  |         'url': 'https://vod.sooplive.co.kr/player/70395877', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # subscribers only | ||||||
|  |         'url': 'https://vod.sooplive.co.kr/player/104647403', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # private | ||||||
|  |         'url': 'https://vod.sooplive.co.kr/player/81669846', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |         data = self._call_api( | ||||||
|         partial_view = False |             'station/video/a/view', video_id, headers={'Referer': url}, | ||||||
|         adult_view = False |             data=urlencode_postdata({ | ||||||
|         for _ in range(2): |  | ||||||
|             data = self._download_json( |  | ||||||
|                 'https://api.m.afreecatv.com/station/video/a/view', |  | ||||||
|                 video_id, headers={'Referer': url}, data=urlencode_postdata({ |  | ||||||
|                     'nTitleNo': video_id, |  | ||||||
|                     'nApiLevel': 10, |  | ||||||
|                 }))['data'] |  | ||||||
|             if traverse_obj(data, ('code', {int})) == -6221: |  | ||||||
|                 raise ExtractorError('The VOD does not exist', expected=True) |  | ||||||
|             query = { |  | ||||||
|                 'nTitleNo': video_id, |                 'nTitleNo': video_id, | ||||||
|                 'nStationNo': data['station_no'], |                 'nApiLevel': 10, | ||||||
|                 'nBbsNo': data['bbs_no'], |             }))['data'] | ||||||
|             } |  | ||||||
|             if partial_view: |  | ||||||
|                 query['partialView'] = 'SKIP_ADULT' |  | ||||||
|             if adult_view: |  | ||||||
|                 query['adultView'] = 'ADULT_VIEW' |  | ||||||
|             video_xml = self._download_xml( |  | ||||||
|                 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', |  | ||||||
|                 video_id, 'Downloading video info XML%s' |  | ||||||
|                 % (' (skipping adult)' if partial_view else ''), |  | ||||||
|                 video_id, headers={ |  | ||||||
|                     'Referer': url, |  | ||||||
|                 }, query=query) |  | ||||||
|  |  | ||||||
|             flag = xpath_text(video_xml, './track/flag', 'flag', default=None) |         error_code = traverse_obj(data, ('code', {int})) | ||||||
|             if flag and flag == 'SUCCEED': |         if error_code == -6221: | ||||||
|                 break |             raise ExtractorError('The VOD does not exist', expected=True) | ||||||
|             if flag == 'PARTIAL_ADULT': |         elif error_code == -6205: | ||||||
|                 self.report_warning( |             raise ExtractorError('This VOD is private', expected=True) | ||||||
|                     'In accordance with local laws and regulations, underage users are restricted from watching adult content. ' |  | ||||||
|                     'Only content suitable for all ages will be downloaded. ' |  | ||||||
|                     'Provide account credentials if you wish to download restricted content.') |  | ||||||
|                 partial_view = True |  | ||||||
|                 continue |  | ||||||
|             elif flag == 'ADULT': |  | ||||||
|                 if not adult_view: |  | ||||||
|                     adult_view = True |  | ||||||
|                     continue |  | ||||||
|                 error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.' |  | ||||||
|             else: |  | ||||||
|                 error = flag |  | ||||||
|             raise ExtractorError( |  | ||||||
|                 '%s said: %s' % (self.IE_NAME, error), expected=True) |  | ||||||
|         else: |  | ||||||
|             raise ExtractorError('Unable to download video info') |  | ||||||
|  |  | ||||||
|         video_element = video_xml.findall('./track/video')[-1] |         common_info = traverse_obj(data, { | ||||||
|         if video_element is None or video_element.text is None: |             'title': ('title', {str}), | ||||||
|             raise ExtractorError( |             'uploader': ('writer_nick', {str}), | ||||||
|                 'Video %s does not exist' % video_id, expected=True) |             'uploader_id': ('bj_id', {str}), | ||||||
|  |             'duration': ('total_file_duration', {int_or_none(scale=1000)}), | ||||||
|         video_url = video_element.text.strip() |             'thumbnails': ('thumb', {self._fixup_thumb}), | ||||||
|  |  | ||||||
|         title = xpath_text(video_xml, './track/title', 'title', fatal=True) |  | ||||||
|  |  | ||||||
|         uploader = xpath_text(video_xml, './track/nickname', 'uploader') |  | ||||||
|         uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') |  | ||||||
|         duration = int_or_none(xpath_text( |  | ||||||
|             video_xml, './track/duration', 'duration')) |  | ||||||
|         thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') |  | ||||||
|  |  | ||||||
|         common_entry = { |  | ||||||
|             'uploader': uploader, |  | ||||||
|             'uploader_id': uploader_id, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         info = common_entry.copy() |  | ||||||
|         info.update({ |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'duration': duration, |  | ||||||
|         }) |         }) | ||||||
|  |  | ||||||
|         if not video_url: |         entries = [] | ||||||
|             entries = [] |         for file_num, file_element in enumerate( | ||||||
|             file_elements = video_element.findall('./file') |                 traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1): | ||||||
|             one = len(file_elements) == 1 |             file_url = file_element['file'] | ||||||
|             for file_num, file_element in enumerate(file_elements, start=1): |             if determine_ext(file_url) == 'm3u8': | ||||||
|                 file_url = url_or_none(file_element.text) |                 formats = self._extract_m3u8_formats( | ||||||
|                 if not file_url: |                     file_url, video_id, 'mp4', m3u8_id='hls', | ||||||
|                     continue |                     note=f'Downloading part {file_num} m3u8 information') | ||||||
|                 key = file_element.get('key', '') |             else: | ||||||
|                 upload_date = unified_strdate(self._search_regex( |                 formats = [{ | ||||||
|                     r'^(\d{8})_', key, 'upload date', default=None)) |                     'url': file_url, | ||||||
|                 if upload_date is not None: |                     'format_id': 'http', | ||||||
|                     # sometimes the upload date isn't included in the file name |                 }] | ||||||
|                     # instead, another random ID is, which may parse as a valid |  | ||||||
|                     # date but be wildly out of a reasonable range |  | ||||||
|                     parsed_date = date_from_str(upload_date) |  | ||||||
|                     if parsed_date.year < 2000 or parsed_date.year >= 2100: |  | ||||||
|                         upload_date = None |  | ||||||
|                 file_duration = int_or_none(file_element.get('duration')) |  | ||||||
|                 format_id = key if key else '%s_%s' % (video_id, file_num) |  | ||||||
|                 if determine_ext(file_url) == 'm3u8': |  | ||||||
|                     formats = self._extract_m3u8_formats( |  | ||||||
|                         file_url, video_id, 'mp4', entry_protocol='m3u8_native', |  | ||||||
|                         m3u8_id='hls', |  | ||||||
|                         note='Downloading part %d m3u8 information' % file_num) |  | ||||||
|                 else: |  | ||||||
|                     formats = [{ |  | ||||||
|                         'url': file_url, |  | ||||||
|                         'format_id': 'http', |  | ||||||
|                     }] |  | ||||||
|                 if not formats and not self.get_param('ignore_no_formats'): |  | ||||||
|                     continue |  | ||||||
|                 file_info = common_entry.copy() |  | ||||||
|                 file_info.update({ |  | ||||||
|                     'id': format_id, |  | ||||||
|                     'title': title if one else '%s (part %d)' % (title, file_num), |  | ||||||
|                     'upload_date': upload_date, |  | ||||||
|                     'duration': file_duration, |  | ||||||
|                     'formats': formats, |  | ||||||
|                 }) |  | ||||||
|                 entries.append(file_info) |  | ||||||
|             entries_info = info.copy() |  | ||||||
|             entries_info.update({ |  | ||||||
|                 '_type': 'multi_video', |  | ||||||
|                 'entries': entries, |  | ||||||
|             }) |  | ||||||
|             return entries_info |  | ||||||
|  |  | ||||||
|         info = { |             entries.append({ | ||||||
|             'id': video_id, |                 **common_info, | ||||||
|             'title': title, |                 'id': file_element.get('file_info_key') or f'{video_id}_{file_num}', | ||||||
|             'uploader': uploader, |                 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})', | ||||||
|             'uploader_id': uploader_id, |                 'formats': formats, | ||||||
|             'duration': duration, |                 **traverse_obj(file_element, { | ||||||
|             'thumbnail': thumbnail, |                     'duration': ('duration', {int_or_none(scale=1000)}), | ||||||
|         } |                     'timestamp': ('file_start', {parse_iso8601(delimiter=' ', timezone=dt.timedelta(hours=9))}), | ||||||
|  |                 }), | ||||||
|         if determine_ext(video_url) == 'm3u8': |  | ||||||
|             info['formats'] = self._extract_m3u8_formats( |  | ||||||
|                 video_url, video_id, 'mp4', entry_protocol='m3u8_native', |  | ||||||
|                 m3u8_id='hls') |  | ||||||
|         else: |  | ||||||
|             app, playpath = video_url.split('mp4:') |  | ||||||
|             info.update({ |  | ||||||
|                 'url': app, |  | ||||||
|                 'ext': 'flv', |  | ||||||
|                 'play_path': 'mp4:' + playpath, |  | ||||||
|                 'rtmp_live': True,  # downloading won't end without this |  | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         return info |         if traverse_obj(data, ('adult_status', {str})) == 'notLogin': | ||||||
|  |             if not entries: | ||||||
|  |                 self.raise_login_required( | ||||||
|  |                     'Only users older than 19 are able to watch this video', method='password') | ||||||
|  |             self.report_warning( | ||||||
|  |                 'In accordance with local laws and regulations, underage users are ' | ||||||
|  |                 'restricted from watching adult content. Only content suitable for all ' | ||||||
|  |                 f'ages will be downloaded. {self._login_hint("password")}') | ||||||
|  |  | ||||||
|  |         if not entries and traverse_obj(data, ('sub_upload_type', {str})): | ||||||
|  |             self.raise_login_required('This VOD is for subscribers only', method='password') | ||||||
|  |  | ||||||
|  |         if len(entries) == 1: | ||||||
|  |             return { | ||||||
|  |                 **entries[0], | ||||||
|  |                 'title': common_info.get('title'), | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False) | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries, video_id, multi_video=True, **common_info) | ||||||
|  |  | ||||||
|  |  | ||||||
| class AfreecaTVLiveIE(AfreecaTVIE):  # XXX: Do not subclass from concrete IE | class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): | ||||||
|  |     IE_NAME = 'soop:catchstory' | ||||||
|     IE_NAME = 'afreecatv:live' |     IE_DESC = 'sooplive.co.kr catch story' | ||||||
|     _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?' |     _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://play.afreecatv.com/pyh3646/237852185', |         'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '103247', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 2, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         data = self._call_api( | ||||||
|  |             'catchstory/a/view', video_id, headers={'Referer': url}, | ||||||
|  |             query={'aStoryListIdx': '', 'nStoryIdx': video_id}) | ||||||
|  |  | ||||||
|  |         return self.playlist_result(self._entries(data), video_id) | ||||||
|  |  | ||||||
|  |     def _entries(self, data): | ||||||
|  |         # 'files' is always a list with 1 element | ||||||
|  |         yield from traverse_obj(data, ( | ||||||
|  |             'data', lambda _, v: v['story_type'] == 'catch', | ||||||
|  |             'catch_list', lambda _, v: v['files'][0]['file'], { | ||||||
|  |                 'id': ('files', 0, 'file_info_key', {str}), | ||||||
|  |                 'url': ('files', 0, 'file', {url_or_none}), | ||||||
|  |                 'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}), | ||||||
|  |                 'title': ('title', {str}), | ||||||
|  |                 'uploader': ('writer_nick', {str}), | ||||||
|  |                 'uploader_id': ('writer_id', {str}), | ||||||
|  |                 'thumbnails': ('thumb', {self._fixup_thumb}), | ||||||
|  |                 'timestamp': ('write_timestamp', {int_or_none}), | ||||||
|  |             })) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AfreecaTVLiveIE(AfreecaTVBaseIE): | ||||||
|  |     IE_NAME = 'soop:live' | ||||||
|  |     IE_DESC = 'sooplive.co.kr livestreams' | ||||||
|  |     _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://play.sooplive.co.kr/pyh3646/237852185', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '237852185', |             'id': '237852185', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
| @@ -347,94 +268,121 @@ class AfreecaTVLiveIE(AfreecaTVIE):  # XXX: Do not subclass from concrete IE | |||||||
|         }, |         }, | ||||||
|         'skip': 'Livestream has ended', |         'skip': 'Livestream has ended', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://play.afreeca.com/pyh3646/237852185', |         'url': 'https://play.sooplive.co.kr/pyh3646/237852185', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://play.afreeca.com/pyh3646', |         'url': 'https://play.sooplive.co.kr/pyh3646', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' |     _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' | ||||||
|  |     _WORKING_CDNS = [ | ||||||
|  |         'gcp_cdn',  # live-global-cdn-v02.sooplive.co.kr | ||||||
|  |         'gs_cdn_pc_app',  # pc-app.stream.sooplive.co.kr | ||||||
|  |         'gs_cdn_mobile_web',  # mobile-web.stream.sooplive.co.kr | ||||||
|  |         'gs_cdn_pc_web',  # pc-web.stream.sooplive.co.kr | ||||||
|  |     ] | ||||||
|  |     _BAD_CDNS = [ | ||||||
|  |         'gs_cdn',  # chromecast.afreeca.gscdn.com (cannot resolve) | ||||||
|  |         'gs_cdn_chromecast',  # chromecast.stream.sooplive.co.kr (HTTP Error 400) | ||||||
|  |         'azure_cdn',  # live-global-cdn-v01.sooplive.co.kr (cannot resolve) | ||||||
|  |         'aws_cf',  # live-global-cdn-v03.sooplive.co.kr (cannot resolve) | ||||||
|  |         'kt_cdn',  # kt.stream.sooplive.co.kr (HTTP Error 400) | ||||||
|  |     ] | ||||||
|  |  | ||||||
|     _QUALITIES = ('sd', 'hd', 'hd2k', 'original') |     def _extract_formats(self, channel_info, broadcast_no, aid): | ||||||
|  |         stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' | ||||||
|  |  | ||||||
|  |         # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs | ||||||
|  |         default_cdn_ids = orderedSet([ | ||||||
|  |             *traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)), | ||||||
|  |             *self._WORKING_CDNS, | ||||||
|  |         ]) | ||||||
|  |         cdn_ids = self._configuration_arg('cdn', default_cdn_ids) | ||||||
|  |  | ||||||
|  |         for attempt, cdn_id in enumerate(cdn_ids, start=1): | ||||||
|  |             m3u8_url = traverse_obj(self._download_json( | ||||||
|  |                 urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no, | ||||||
|  |                 f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info', | ||||||
|  |                 fatal=False, query={ | ||||||
|  |                     'return_type': cdn_id, | ||||||
|  |                     'broad_key': f'{broadcast_no}-common-master-hls', | ||||||
|  |                 }), ('view_url', {url_or_none})) | ||||||
|  |             try: | ||||||
|  |                 return self._extract_m3u8_formats( | ||||||
|  |                     m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, | ||||||
|  |                     headers={'Referer': 'https://play.sooplive.co.kr/'}) | ||||||
|  |             except ExtractorError as e: | ||||||
|  |                 if attempt == len(cdn_ids): | ||||||
|  |                     raise | ||||||
|  |                 self.report_warning( | ||||||
|  |                     f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})') | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno') |         broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno') | ||||||
|         password = self.get_param('videopassword') |         channel_info = traverse_obj(self._download_json( | ||||||
|  |             self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})), | ||||||
|  |             ('CHANNEL', {dict})) or {} | ||||||
|  |  | ||||||
|         info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False, |  | ||||||
|                                    data=urlencode_postdata({'bid': broadcaster_id})) or {} |  | ||||||
|         channel_info = info.get('CHANNEL') or {} |  | ||||||
|         broadcaster_id = channel_info.get('BJID') or broadcaster_id |         broadcaster_id = channel_info.get('BJID') or broadcaster_id | ||||||
|         broadcast_no = channel_info.get('BNO') or broadcast_no |         broadcast_no = channel_info.get('BNO') or broadcast_no | ||||||
|         password_protected = channel_info.get('BPWD') |  | ||||||
|         if not broadcast_no: |         if not broadcast_no: | ||||||
|             raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True) |             result = channel_info.get('RESULT') | ||||||
|         if password_protected == 'Y' and password is None: |             if result == 0: | ||||||
|  |                 raise UserNotLive(video_id=broadcaster_id) | ||||||
|  |             elif result == -6: | ||||||
|  |                 self.raise_login_required( | ||||||
|  |                     'This channel is streaming for subscribers only', method='password') | ||||||
|  |             raise ExtractorError('Unable to extract broadcast number') | ||||||
|  |  | ||||||
|  |         password = self.get_param('videopassword') | ||||||
|  |         if channel_info.get('BPWD') == 'Y' and password is None: | ||||||
|             raise ExtractorError( |             raise ExtractorError( | ||||||
|                 'This livestream is protected by a password, use the --video-password option', |                 'This livestream is protected by a password, use the --video-password option', | ||||||
|                 expected=True) |                 expected=True) | ||||||
|  |  | ||||||
|         formats = [] |         token_info = traverse_obj(self._download_json( | ||||||
|         quality_key = qualities(self._QUALITIES) |             self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream', | ||||||
|         for quality_str in self._QUALITIES: |             'Unable to download access token for stream', data=urlencode_postdata(filter_dict({ | ||||||
|             params = { |  | ||||||
|                 'bno': broadcast_no, |                 'bno': broadcast_no, | ||||||
|                 'stream_type': 'common', |                 'stream_type': 'common', | ||||||
|                 'type': 'aid', |                 'type': 'aid', | ||||||
|                 'quality': quality_str, |                 'quality': 'master', | ||||||
|             } |                 'pwd': password, | ||||||
|             if password is not None: |             }))), ('CHANNEL', {dict})) or {} | ||||||
|                 params['pwd'] = password |         aid = token_info.get('AID') | ||||||
|             aid_response = self._download_json( |         if not aid: | ||||||
|                 self._LIVE_API_URL, broadcast_no, fatal=False, |             result = token_info.get('RESULT') | ||||||
|                 data=urlencode_postdata(params), |             if result == 0: | ||||||
|                 note=f'Downloading access token for {quality_str} stream', |                 raise ExtractorError('This livestream has ended', expected=True) | ||||||
|                 errnote=f'Unable to download access token for {quality_str} stream') |             elif result == -6: | ||||||
|             aid = traverse_obj(aid_response, ('CHANNEL', 'AID')) |                 self.raise_login_required('This livestream is for subscribers only', method='password') | ||||||
|             if not aid: |             raise ExtractorError('Unable to extract access token') | ||||||
|                 continue |  | ||||||
|  |  | ||||||
|             stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' |         formats = self._extract_formats(channel_info, broadcast_no, aid) | ||||||
|             stream_info = self._download_json( |  | ||||||
|                 f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False, |  | ||||||
|                 query={ |  | ||||||
|                     'return_type': channel_info.get('CDN', 'gcp_cdn'), |  | ||||||
|                     'broad_key': f'{broadcast_no}-common-{quality_str}-hls', |  | ||||||
|                 }, |  | ||||||
|                 note=f'Downloading metadata for {quality_str} stream', |  | ||||||
|                 errnote=f'Unable to download metadata for {quality_str} stream') or {} |  | ||||||
|  |  | ||||||
|             if stream_info.get('view_url'): |         station_info = traverse_obj(self._download_json( | ||||||
|                 formats.append({ |             'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, | ||||||
|                     'format_id': quality_str, |             'Downloading channel metadata', 'Unable to download channel metadata', | ||||||
|                     'url': update_url_query(stream_info['view_url'], {'aid': aid}), |             query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} | ||||||
|                     'ext': 'mp4', |  | ||||||
|                     'protocol': 'm3u8', |  | ||||||
|                     'quality': quality_key(quality_str), |  | ||||||
|                 }) |  | ||||||
|  |  | ||||||
|         station_info = self._download_json( |  | ||||||
|             'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, |  | ||||||
|             query={'szBjId': broadcaster_id}, fatal=False, |  | ||||||
|             note='Downloading channel metadata', errnote='Unable to download channel metadata') or {} |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': broadcast_no, |             'id': broadcast_no, | ||||||
|             'title': channel_info.get('TITLE') or station_info.get('station_title'), |             'title': channel_info.get('TITLE') or station_info.get('station_title'), | ||||||
|             'uploader': channel_info.get('BJNICK') or station_info.get('station_name'), |             'uploader': channel_info.get('BJNICK') or station_info.get('station_name'), | ||||||
|             'uploader_id': broadcaster_id, |             'uploader_id': broadcaster_id, | ||||||
|             'timestamp': unified_timestamp(station_info.get('broad_start')), |             'timestamp': parse_iso8601(station_info.get('broad_start'), delimiter=' ', timezone=dt.timedelta(hours=9)), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'is_live': True, |             'is_live': True, | ||||||
|  |             'http_headers': {'Referer': url}, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
| class AfreecaTVUserIE(InfoExtractor): | class AfreecaTVUserIE(AfreecaTVBaseIE): | ||||||
|     IE_NAME = 'afreecatv:user' |     IE_NAME = 'soop:user' | ||||||
|     _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?' |     _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', |         'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             '_type': 'playlist', |             '_type': 'playlist', | ||||||
|             'id': 'ryuryu24', |             'id': 'ryuryu24', | ||||||
| @@ -442,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|         'playlist_count': 218, |         'playlist_count': 218, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', |         'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             '_type': 'playlist', |             '_type': 'playlist', | ||||||
|             'id': 'parang1995', |             'id': 'parang1995', | ||||||
| @@ -450,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|         'playlist_count': 997, |         'playlist_count': 997, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://bj.afreecatv.com/ryuryu24/vods', |         'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             '_type': 'playlist', |             '_type': 'playlist', | ||||||
|             'id': 'ryuryu24', |             'id': 'ryuryu24', | ||||||
| @@ -458,7 +406,7 @@ class AfreecaTVUserIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|         'playlist_count': 221, |         'playlist_count': 221, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', |         'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             '_type': 'playlist', |             '_type': 'playlist', | ||||||
|             'id': 'ryuryu24', |             'id': 'ryuryu24', | ||||||
| @@ -470,12 +418,12 @@ class AfreecaTVUserIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _fetch_page(self, user_id, user_type, page): |     def _fetch_page(self, user_id, user_type, page): | ||||||
|         page += 1 |         page += 1 | ||||||
|         info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, |         info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, | ||||||
|                                    query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, |                                    query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, | ||||||
|                                    note=f'Downloading {user_type} video page {page}') |                                    note=f'Downloading {user_type} video page {page}') | ||||||
|         for item in info['data']: |         for item in info['data']: | ||||||
|             yield self.url_result( |             yield self.url_result( | ||||||
|                 f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) |                 f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') |         user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') | ||||||
|   | |||||||
| @@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor): | |||||||
|         'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', |         'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '91275', |             'id': '91275', | ||||||
|             'ext': 'aac', |             'ext': 'mp3', | ||||||
|             'title': 'md5:a9b15488009065556900169fb8061cce', |             'title': 'md5:a9b15488009065556900169fb8061cce', | ||||||
|             'episode': 'md5:a9b15488009065556900169fb8061cce', |             'episode': 'md5:a9b15488009065556900169fb8061cce', | ||||||
|             'series': 'Analizy', |             'series': 'Analizy', | ||||||
| @@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor): | |||||||
|             raise ExtractorError('No such podcast', expected=True) |             raise ExtractorError('No such podcast', expected=True) | ||||||
|         metadata = metadata[0] |         metadata = metadata[0] | ||||||
|  |  | ||||||
|         formats = [] |         mp3_url = self._download_json( | ||||||
|         for ext in ('aac', 'mp3'): |             'https://api.podcast.radioagora.pl/api4/getSongUrl', | ||||||
|             url_data = self._download_json( |             media_id, 'Downloading podcast mp3 URL', query={ | ||||||
|                 f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', |                 'podcast_id': media_id, | ||||||
|                 media_id, 'Downloading podcast %s URL' % ext) |                 'device_id': str(uuid.uuid4()), | ||||||
|             # prevents inserting the mp3 (default) multiple times |                 'ppre': 'false', | ||||||
|             if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: |                 'audio': 'mp3', | ||||||
|                 formats.append({ |             })['link_ssl'] | ||||||
|                     'url': url_data['link_ssl'], |  | ||||||
|                     'ext': ext, |  | ||||||
|                     'vcodec': 'none', |  | ||||||
|                     'acodec': ext, |  | ||||||
|                 }) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': media_id, |             'id': media_id, | ||||||
|             'formats': formats, |             'url': mp3_url, | ||||||
|  |             'vcodec': 'none', | ||||||
|  |             'ext': 'mp3', | ||||||
|             'title': metadata.get('podcast_name'), |             'title': metadata.get('podcast_name'), | ||||||
|             'series': metadata.get('series_name'), |             'series': metadata.get('series_name'), | ||||||
|             'episode': metadata.get('podcast_name'), |             'episode': metadata.get('podcast_name'), | ||||||
| @@ -206,8 +203,8 @@ class TokFMAuditionIE(InfoExtractor): | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def _create_url(id): |     def _create_url(video_id): | ||||||
|         return f'https://audycje.tokfm.pl/audycja/{id}' |         return f'https://audycje.tokfm.pl/audycja/{video_id}' | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         audition_id = self._match_id(url) |         audition_id = self._match_id(url) | ||||||
|   | |||||||
| @@ -1,63 +0,0 @@ | |||||||
| import re |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor |  | ||||||
| from ..utils import ( |  | ||||||
|     int_or_none, |  | ||||||
|     parse_duration, |  | ||||||
|     parse_iso8601, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AirMozillaIE(InfoExtractor): |  | ||||||
|     _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?' |  | ||||||
|     _TEST = { |  | ||||||
|         'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', |  | ||||||
|         'md5': '8d02f53ee39cf006009180e21df1f3ba', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '6x4q2w', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', |  | ||||||
|             'thumbnail': r're:https?://.*/poster\.jpg', |  | ||||||
|             'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', |  | ||||||
|             'timestamp': 1422487800, |  | ||||||
|             'upload_date': '20150128', |  | ||||||
|             'location': 'SFO Commons', |  | ||||||
|             'duration': 3780, |  | ||||||
|             'view_count': int, |  | ||||||
|             'categories': ['Main', 'Privacy'], |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         display_id = self._match_id(url) |  | ||||||
|         webpage = self._download_webpage(url, display_id) |  | ||||||
|         video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') |  | ||||||
|  |  | ||||||
|         embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) |  | ||||||
|         jwconfig = self._parse_json(self._search_regex( |  | ||||||
|             r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] |  | ||||||
|  |  | ||||||
|         info_dict = self._parse_jwplayer_data(jwconfig, video_id) |  | ||||||
|         view_count = int_or_none(self._html_search_regex( |  | ||||||
|             r'Views since archived: ([0-9]+)', |  | ||||||
|             webpage, 'view count', fatal=False)) |  | ||||||
|         timestamp = parse_iso8601(self._html_search_regex( |  | ||||||
|             r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False)) |  | ||||||
|         duration = parse_duration(self._search_regex( |  | ||||||
|             r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', |  | ||||||
|             webpage, 'duration', fatal=False)) |  | ||||||
|  |  | ||||||
|         info_dict.update({ |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': self._og_search_title(webpage), |  | ||||||
|             'url': self._og_search_url(webpage), |  | ||||||
|             'display_id': display_id, |  | ||||||
|             'description': self._og_search_description(webpage), |  | ||||||
|             'timestamp': timestamp, |  | ||||||
|             'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), |  | ||||||
|             'duration': duration, |  | ||||||
|             'view_count': view_count, |  | ||||||
|             'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage), |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|         return info_dict |  | ||||||
| @@ -5,7 +5,7 @@ from ..utils import ( | |||||||
|     int_or_none, |     int_or_none, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     traverse_obj |     traverse_obj, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): | |||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', |             'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', | ||||||
|             'timestamp': 1664792603, |             'timestamp': 1664792603, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # with youtube_id |         # with youtube_id | ||||||
|         'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', |         'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', | ||||||
| @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): | |||||||
|             'channel': 'Newsflare', |             'channel': 'Newsflare', | ||||||
|             'duration': 37, |             'duration': 37, | ||||||
|             'upload_date': '20180511', |             'upload_date': '20180511', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _get_formats_and_subtitle(self, json_data, video_id): |     def _get_formats_and_subtitle(self, json_data, video_id): | ||||||
|   | |||||||
| @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): | |||||||
|             'timestamp': 1667370519, |             'timestamp': 1667370519, | ||||||
|             'title': 'Ангел хранитель 1 серия', |             'title': 'Ангел хранитель 1 серия', | ||||||
|             'channel_follower_count': int, |             'channel_follower_count': int, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # embed url |         # embed url | ||||||
|         'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', |         'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     try_get, |     try_get, | ||||||
| @@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor): | |||||||
|             'title': title, |             'title': title, | ||||||
|             'thumbnail': data.get('coverUrl'), |             'thumbnail': data.get('coverUrl'), | ||||||
|             'uploader': try_get( |             'uploader': try_get( | ||||||
|                 data, lambda x: x['followBar']['name'], compat_str), |                 data, lambda x: x['followBar']['name'], str), | ||||||
|             'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), |             'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): | |||||||
|             'timestamp': 1636219149, |             'timestamp': 1636219149, | ||||||
|             'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', |             'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', | ||||||
|             'upload_date': '20211106', |             'upload_date': '20211106', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', |         'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): | |||||||
|     BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)' |     BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)' | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         base, post_type, id = self._match_valid_url(url).groups() |         base, post_type, display_id = self._match_valid_url(url).groups() | ||||||
|         wp = { |         wp = { | ||||||
|             'balkans.aljazeera.net': 'ajb', |             'balkans.aljazeera.net': 'ajb', | ||||||
|             'chinese.aljazeera.net': 'chinese', |             'chinese.aljazeera.net': 'chinese', | ||||||
| @@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor): | |||||||
|             'news': 'news', |             'news': 'news', | ||||||
|         }[post_type.split('/')[0]] |         }[post_type.split('/')[0]] | ||||||
|         video = self._download_json( |         video = self._download_json( | ||||||
|             f'https://{base}/graphql', id, query={ |             f'https://{base}/graphql', display_id, query={ | ||||||
|                 'wp-site': wp, |                 'wp-site': wp, | ||||||
|                 'operationName': 'ArchipelagoSingleArticleQuery', |                 'operationName': 'ArchipelagoSingleArticleQuery', | ||||||
|                 'variables': json.dumps({ |                 'variables': json.dumps({ | ||||||
|                     'name': id, |                     'name': display_id, | ||||||
|                     'postType': post_type, |                     'postType': post_type, | ||||||
|                 }), |                 }), | ||||||
|             }, headers={ |             }, headers={ | ||||||
| @@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor): | |||||||
|         embed = 'default' |         embed = 'default' | ||||||
|  |  | ||||||
|         if video_id is None: |         if video_id is None: | ||||||
|             webpage = self._download_webpage(url, id) |             webpage = self._download_webpage(url, display_id) | ||||||
|  |  | ||||||
|             account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', |             account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', | ||||||
|                                                                      group=(1, 2, 3, 4), default=(None, None, None, None)) |                                                                      group=(1, 2, 3, 4), default=(None, None, None, None)) | ||||||
| @@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor): | |||||||
|                 return { |                 return { | ||||||
|                     '_type': 'url_transparent', |                     '_type': 'url_transparent', | ||||||
|                     'url': url, |                     'url': url, | ||||||
|                     'ie_key': 'Generic' |                     'ie_key': 'Generic', | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url_transparent', |             '_type': 'url_transparent', | ||||||
|             'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', |             'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', | ||||||
|             'ie_key': 'BrightcoveNew' |             'ie_key': 'BrightcoveNew', | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     qualities, |     qualities, | ||||||
| @@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor): | |||||||
|             duration = int_or_none(video.get('duration')) |             duration = int_or_none(video.get('duration')) | ||||||
|             view_count = int_or_none(video.get('view_count')) |             view_count = int_or_none(video.get('view_count')) | ||||||
|             timestamp = unified_timestamp(try_get( |             timestamp = unified_timestamp(try_get( | ||||||
|                 video, lambda x: x['added_at']['date'], compat_str)) |                 video, lambda x: x['added_at']['date'], str)) | ||||||
|         else: |         else: | ||||||
|             video_id = display_id |             video_id = display_id | ||||||
|             media_data = self._download_json( |             media_data = self._download_json( | ||||||
|                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) |                 f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) | ||||||
|             title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) |             title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) | ||||||
|             for key, value in media_data['video'].items(): |             for key, value in media_data['video'].items(): | ||||||
|                 if not key.endswith('Path'): |                 if not key.endswith('Path'): | ||||||
|   | |||||||
							
								
								
									
										252
									
								
								plugins/youtube_download/yt_dlp/extractor/allstar.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										252
									
								
								plugins/youtube_download/yt_dlp/extractor/allstar.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,252 @@ | |||||||
|  | import functools | ||||||
|  | import json | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     OnDemandPagedList, | ||||||
|  |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|  |     parse_qs, | ||||||
|  |     urljoin, | ||||||
|  | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  | _FIELDS = ''' | ||||||
|  |     _id | ||||||
|  |     clipImageSource | ||||||
|  |     clipImageThumb | ||||||
|  |     clipLink | ||||||
|  |     clipTitle | ||||||
|  |     createdDate | ||||||
|  |     shareId | ||||||
|  |     user { _id } | ||||||
|  |     username | ||||||
|  |     views''' | ||||||
|  |  | ||||||
|  | _EXTRA_FIELDS = ''' | ||||||
|  |     clipLength | ||||||
|  |     clipSizeBytes''' | ||||||
|  |  | ||||||
|  | _QUERIES = { | ||||||
|  |     'clip': '''query ($id: String!) { | ||||||
|  |         video: getClip(clipIdentifier: $id) { | ||||||
|  |             %s %s | ||||||
|  |         } | ||||||
|  |     }''' % (_FIELDS, _EXTRA_FIELDS),  # noqa: UP031 | ||||||
|  |     'montage': '''query ($id: String!) { | ||||||
|  |         video: getMontage(clipIdentifier: $id) { | ||||||
|  |             %s | ||||||
|  |         } | ||||||
|  |     }''' % _FIELDS,  # noqa: UP031 | ||||||
|  |     'Clips': '''query ($page: Int!, $user: String!, $game: Int) { | ||||||
|  |         videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { | ||||||
|  |             data { %s %s } | ||||||
|  |         } | ||||||
|  |     }''' % (_FIELDS, _EXTRA_FIELDS),  # noqa: UP031 | ||||||
|  |     'Montages': '''query ($page: Int!, $user: String!) { | ||||||
|  |         videos: montages(search: createdDate, page: $page, user: $user) { | ||||||
|  |             data { %s } | ||||||
|  |         } | ||||||
|  |     }''' % _FIELDS,  # noqa: UP031 | ||||||
|  |     'Mobile Clips': '''query ($page: Int!, $user: String!) { | ||||||
|  |         videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { | ||||||
|  |             data { %s %s } | ||||||
|  |         } | ||||||
|  |     }''' % (_FIELDS, _EXTRA_FIELDS),  # noqa: UP031 | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AllstarBaseIE(InfoExtractor): | ||||||
|  |     @staticmethod | ||||||
|  |     def _parse_video_data(video_data): | ||||||
|  |         def media_url_or_none(path): | ||||||
|  |             return urljoin('https://media.allstar.gg/', path) | ||||||
|  |  | ||||||
|  |         info = traverse_obj(video_data, { | ||||||
|  |             'id': ('_id', {str}), | ||||||
|  |             'display_id': ('shareId', {str}), | ||||||
|  |             'title': ('clipTitle', {str}), | ||||||
|  |             'url': ('clipLink', {media_url_or_none}), | ||||||
|  |             'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}), | ||||||
|  |             'duration': ('clipLength', {int_or_none}), | ||||||
|  |             'filesize': ('clipSizeBytes', {int_or_none}), | ||||||
|  |             'timestamp': ('createdDate', {int_or_none(scale=1000)}), | ||||||
|  |             'uploader': ('username', {str}), | ||||||
|  |             'uploader_id': ('user', '_id', {str}), | ||||||
|  |             'view_count': ('views', {int_or_none}), | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |         if info.get('id') and info.get('url'): | ||||||
|  |             basename = 'clip' if '/clips/' in info['url'] else 'montage' | ||||||
|  |             info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}' | ||||||
|  |  | ||||||
|  |         info.update({ | ||||||
|  |             'extractor_key': AllstarIE.ie_key(), | ||||||
|  |             'extractor': AllstarIE.IE_NAME, | ||||||
|  |             'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')), | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |         return info | ||||||
|  |  | ||||||
|  |     def _call_api(self, query, variables, path, video_id=None, note=None): | ||||||
|  |         response = self._download_json( | ||||||
|  |             'https://a1.allstar.gg/graphql', video_id, note=note, | ||||||
|  |             headers={'content-type': 'application/json'}, | ||||||
|  |             data=json.dumps({'variables': variables, 'query': query}).encode()) | ||||||
|  |  | ||||||
|  |         errors = traverse_obj(response, ('errors', ..., 'message', {str})) | ||||||
|  |         if errors: | ||||||
|  |             raise ExtractorError('; '.join(errors)) | ||||||
|  |  | ||||||
|  |         return traverse_obj(response, path) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AllstarIE(AllstarBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '64482c2da9eec30008a67d1b', | ||||||
|  |             'title': '4K on Inferno', | ||||||
|  |             'url': 'md5:66befb5381eef0c9456026386c25fa55', | ||||||
|  |             'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$', | ||||||
|  |             'uploader': 'chrk.', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'duration': 20, | ||||||
|  |             'filesize': 21199257, | ||||||
|  |             'timestamp': 1682451501, | ||||||
|  |             'uploader_id': '62b8bdfc9021052f7905882d', | ||||||
|  |             'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', | ||||||
|  |             'upload_date': '20230425', | ||||||
|  |             'view_count': int, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '64a1ec6b887f4c0008dc50b8', | ||||||
|  |             'display_id': '8LJLY4JKB', | ||||||
|  |             'title': 'AK-47 3K on Mirage', | ||||||
|  |             'url': 'md5:dde224fd12f035c0e2529a4ae34c4283', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$', | ||||||
|  |             'duration': 16, | ||||||
|  |             'filesize': 30175859, | ||||||
|  |             'timestamp': 1688333419, | ||||||
|  |             'uploader': 'cherokee', | ||||||
|  |             'uploader_id': '62b8bdfc9021052f7905882d', | ||||||
|  |             'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', | ||||||
|  |             'upload_date': '20230702', | ||||||
|  |             'view_count': int, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '643e64089da7e9363e1fa66c', | ||||||
|  |             'display_id': 'APQLGM2IMXW', | ||||||
|  |             'title': 'cherokee Rapid Fire Snipers Montage', | ||||||
|  |             'url': 'md5:a3ee356022115db2b27c81321d195945', | ||||||
|  |             'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'timestamp': 1681810448, | ||||||
|  |             'uploader': 'cherokee', | ||||||
|  |             'uploader_id': '62b8bdfc9021052f7905882d', | ||||||
|  |             'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', | ||||||
|  |             'upload_date': '20230418', | ||||||
|  |             'view_count': int, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '64a2697372ce3703de29e868', | ||||||
|  |             'display_id': 'RILJMH6QOS', | ||||||
|  |             'title': 'cherokee Rapid Fire Snipers Montage', | ||||||
|  |             'url': 'md5:d5672e6f88579730c2310a80fdbc4030', | ||||||
|  |             'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'timestamp': 1688365434, | ||||||
|  |             'uploader': 'cherokee', | ||||||
|  |             'uploader_id': '62b8bdfc9021052f7905882d', | ||||||
|  |             'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', | ||||||
|  |             'upload_date': '20230703', | ||||||
|  |             'view_count': int, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         query_id, video_id = self._match_valid_url(url).group('type', 'id') | ||||||
|  |  | ||||||
|  |         return self._parse_video_data( | ||||||
|  |             self._call_api( | ||||||
|  |                 _QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AllstarProfileIE(AllstarBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '62b8bdfc9021052f7905882d-clips', | ||||||
|  |             'title': 'cherokee - Clips', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 15, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '62b8bdfc9021052f7905882d-clips-730', | ||||||
|  |             'title': 'cherokee - Clips - 730', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 15, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '62b8bdfc9021052f7905882d-montages', | ||||||
|  |             'title': 'cherokee - Montages', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 4, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '62b8bdfc9021052f7905882d-mobile', | ||||||
|  |             'title': 'cherokee - Mobile Clips', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 1, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     _PAGE_SIZE = 10 | ||||||
|  |  | ||||||
|  |     def _get_page(self, user_id, display_id, game, query, page_num): | ||||||
|  |         page_num += 1 | ||||||
|  |  | ||||||
|  |         for video_data in self._call_api( | ||||||
|  |                 query, { | ||||||
|  |                     'user': user_id, | ||||||
|  |                     'page': page_num, | ||||||
|  |                     'game': game, | ||||||
|  |                 }, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'): | ||||||
|  |             yield self._parse_video_data(video_data) | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  |         profile_data = self._download_json( | ||||||
|  |             urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id) | ||||||
|  |         user_id = traverse_obj(profile_data, ('data', ('_id'), {str})) | ||||||
|  |         if not user_id: | ||||||
|  |             raise ExtractorError('Unable to extract the user id') | ||||||
|  |  | ||||||
|  |         username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str})) | ||||||
|  |         url_query = parse_qs(url) | ||||||
|  |         game = traverse_obj(url_query, ('game', 0, {int_or_none})) | ||||||
|  |         query_id = traverse_obj(url_query, ('view', 0), default='Clips') | ||||||
|  |  | ||||||
|  |         if query_id not in ('Clips', 'Montages', 'Mobile Clips'): | ||||||
|  |             raise ExtractorError(f'Unsupported playlist URL type {query_id!r}') | ||||||
|  |  | ||||||
|  |         return self.playlist_result( | ||||||
|  |             OnDemandPagedList( | ||||||
|  |                 functools.partial( | ||||||
|  |                     self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE), | ||||||
|  |             playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game), | ||||||
|  |             playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - ')) | ||||||
| @@ -1,9 +1,9 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     parse_iso8601, |     int_or_none, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     parse_filesize, |     parse_filesize, | ||||||
|     int_or_none, |     parse_iso8601, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): | |||||||
|             'tbr': 1145, |             'tbr': 1145, | ||||||
|             'categories': list, |             'categories': list, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|         } |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ from ..utils import ( | |||||||
| class Alsace20TVBaseIE(InfoExtractor): | class Alsace20TVBaseIE(InfoExtractor): | ||||||
|     def _extract_video(self, video_id, url=None): |     def _extract_video(self, video_id, url=None): | ||||||
|         info = self._download_json( |         info = self._download_json( | ||||||
|             'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), |             f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', | ||||||
|             video_id) or {} |             video_id) or {} | ||||||
|         title = info.get('titre') |         title = info.get('titre') | ||||||
|  |  | ||||||
| @@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor): | |||||||
|                 else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) |                 else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) | ||||||
|  |  | ||||||
|         webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' |         webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' | ||||||
|         thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) |         thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) | ||||||
|         upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) |         upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) | ||||||
|         upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None |         upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|   | |||||||
							
								
								
									
										104
									
								
								plugins/youtube_download/yt_dlp/extractor/altcensored.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								plugins/youtube_download/yt_dlp/extractor/altcensored.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | |||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .archiveorg import ArchiveOrgIE | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     InAdvancePagedList, | ||||||
|  |     clean_html, | ||||||
|  |     int_or_none, | ||||||
|  |     orderedSet, | ||||||
|  |     str_to_int, | ||||||
|  |     urljoin, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AltCensoredIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'altcensored' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'youtube-k0srjLSkga8', | ||||||
|  |             'ext': 'webm', | ||||||
|  |             'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", | ||||||
|  |             'display_id': 'k0srjLSkga8.webm', | ||||||
|  |             'release_date': '20180403', | ||||||
|  |             'creators': ['Virginie Vota'], | ||||||
|  |             'release_year': 2018, | ||||||
|  |             'upload_date': '20230318', | ||||||
|  |             'uploader': 'admin@altcensored.com', | ||||||
|  |             'description': 'md5:0b38a8fc04103579d5c1db10a247dc30', | ||||||
|  |             'timestamp': 1679161343, | ||||||
|  |             'track': 'k0srjLSkga8', | ||||||
|  |             'duration': 926.09, | ||||||
|  |             'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', | ||||||
|  |             'view_count': int, | ||||||
|  |             'categories': ['News & Politics'], | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         category = clean_html(self._html_search_regex( | ||||||
|  |             r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None)) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'url_transparent', | ||||||
|  |             'url': f'https://archive.org/details/youtube-{video_id}', | ||||||
|  |             'ie_key': ArchiveOrgIE.ie_key(), | ||||||
|  |             'view_count': str_to_int(self._html_search_regex( | ||||||
|  |                 r'YouTube Views:(?:\s| )*([\d,]+)', webpage, 'view count', default=None)), | ||||||
|  |             'categories': [category] if category else None, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AltCensoredChannelIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'altcensored:channel' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)' | ||||||
|  |     _PAGE_SIZE = 24 | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'Virginie Vota', | ||||||
|  |             'id': 'UCFPTO55xxHqFqkzRZHu4kcw', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 85, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'yukikaze775', | ||||||
|  |             'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 4, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'Mister Metokur', | ||||||
|  |             'id': 'UCfYbb7nga6-icsFWWgS-kWw', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 121, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         channel_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage( | ||||||
|  |             url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') | ||||||
|  |         title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) | ||||||
|  |         page_count = int_or_none(self._html_search_regex( | ||||||
|  |             r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>', | ||||||
|  |             webpage, 'page count', default='1')) | ||||||
|  |  | ||||||
|  |         def page_func(page_num): | ||||||
|  |             page_num += 1 | ||||||
|  |             webpage = self._download_webpage( | ||||||
|  |                 f'https://altcensored.com/channel/{channel_id}/page/{page_num}', | ||||||
|  |                 channel_id, note=f'Downloading page {page_num}') | ||||||
|  |  | ||||||
|  |             items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage) | ||||||
|  |             return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE) | ||||||
|  |                     for path in orderedSet(items)] | ||||||
|  |  | ||||||
|  |         return self.playlist_result( | ||||||
|  |             InAdvancePagedList(page_func, page_count, self._PAGE_SIZE), | ||||||
|  |             playlist_id=channel_id, playlist_title=title) | ||||||
| @@ -1,17 +1,13 @@ | |||||||
| import re | import re | ||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  |  | ||||||
| from ..compat import ( |  | ||||||
|     compat_urlparse, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     clean_html, | ||||||
|  |     int_or_none, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
|     urljoin, |     urljoin, | ||||||
|     int_or_none, |  | ||||||
|     clean_html, |  | ||||||
|     ExtractorError |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -25,7 +21,7 @@ class AluraIE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '60095', |             'id': '60095', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Referências, ref-set e alter' |             'title': 'Referências, ref-set e alter', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Requires alura account credentials'}, |         'skip': 'Requires alura account credentials'}, | ||||||
|         { |         { | ||||||
| @@ -34,12 +30,12 @@ class AluraIE(InfoExtractor): | |||||||
|             'only_matching': True}, |             'only_matching': True}, | ||||||
|         { |         { | ||||||
|             'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', |             'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', | ||||||
|             'only_matching': True} |             'only_matching': True}, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |  | ||||||
|         course, video_id = self._match_valid_url(url) |         course, video_id = self._match_valid_url(url).group('course_name', 'id') | ||||||
|         video_url = self._VIDEO_URL % (course, video_id) |         video_url = self._VIDEO_URL % (course, video_id) | ||||||
|  |  | ||||||
|         video_dict = self._download_json(video_url, video_id, 'Searching for videos') |         video_dict = self._download_json(video_url, video_id, 'Searching for videos') | ||||||
| @@ -52,7 +48,7 @@ class AluraIE(InfoExtractor): | |||||||
|  |  | ||||||
|             formats = [] |             formats = [] | ||||||
|             for video_obj in video_dict: |             for video_obj in video_dict: | ||||||
|                 video_url_m3u8 = video_obj.get('link') |                 video_url_m3u8 = video_obj.get('mp4') | ||||||
|                 video_format = self._extract_m3u8_formats( |                 video_format = self._extract_m3u8_formats( | ||||||
|                     video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native', |                     video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native', | ||||||
|                     m3u8_id='hls', fatal=False) |                     m3u8_id='hls', fatal=False) | ||||||
| @@ -66,7 +62,7 @@ class AluraIE(InfoExtractor): | |||||||
|             return { |             return { | ||||||
|                 'id': video_id, |                 'id': video_id, | ||||||
|                 'title': video_title, |                 'title': video_title, | ||||||
|                 "formats": formats |                 'formats': formats, | ||||||
|             } |             } | ||||||
|  |  | ||||||
|     def _perform_login(self, username, password): |     def _perform_login(self, username, password): | ||||||
| @@ -95,7 +91,7 @@ class AluraIE(InfoExtractor): | |||||||
|             'post url', default=self._LOGIN_URL, group='url') |             'post url', default=self._LOGIN_URL, group='url') | ||||||
|  |  | ||||||
|         if not post_url.startswith('http'): |         if not post_url.startswith('http'): | ||||||
|             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) |             post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) | ||||||
|  |  | ||||||
|         response = self._download_webpage( |         response = self._download_webpage( | ||||||
|             post_url, None, 'Logging in', |             post_url, None, 'Logging in', | ||||||
| @@ -107,7 +103,7 @@ class AluraIE(InfoExtractor): | |||||||
|                 r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>', |                 r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>', | ||||||
|                 response, 'error message', default=None) |                 response, 'error message', default=None) | ||||||
|             if error: |             if error: | ||||||
|                 raise ExtractorError('Unable to login: %s' % error, expected=True) |                 raise ExtractorError(f'Unable to login: {error}', expected=True) | ||||||
|             raise ExtractorError('Unable to log in') |             raise ExtractorError('Unable to log in') | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -123,7 +119,7 @@ class AluraCourseIE(AluraIE):  # XXX: Do not subclass from concrete IE | |||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) |         return False if AluraIE.suitable(url) else super().suitable(url) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |  | ||||||
| @@ -161,7 +157,7 @@ class AluraCourseIE(AluraIE):  # XXX: Do not subclass from concrete IE | |||||||
|                         'url': video_url, |                         'url': video_url, | ||||||
|                         'id_key': self.ie_key(), |                         'id_key': self.ie_key(), | ||||||
|                         'chapter': chapter, |                         'chapter': chapter, | ||||||
|                         'chapter_number': chapter_number |                         'chapter_number': chapter_number, | ||||||
|                     } |                     } | ||||||
|                     entries.append(entry) |                     entries.append(entry) | ||||||
|         return self.playlist_result(entries, course_path, course_title) |         return self.playlist_result(entries, course_path, course_title) | ||||||
|   | |||||||
							
								
								
									
										77
									
								
								plugins/youtube_download/yt_dlp/extractor/amadeustv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								plugins/youtube_download/yt_dlp/extractor/amadeustv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | |||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     float_or_none, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_iso8601, | ||||||
|  |     url_or_none, | ||||||
|  | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AmadeusTVIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5576678021301411311', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮', | ||||||
|  |             'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg', | ||||||
|  |             'duration': 1264.8, | ||||||
|  |             'upload_date': '20230918', | ||||||
|  |             'timestamp': 1695034800, | ||||||
|  |             'display_id': '65091a87ff85af59d9fc54c3', | ||||||
|  |             'view_count': int, | ||||||
|  |             'description': 'md5:a0357b9c215489e2067cbae0b777bb95', | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, display_id) | ||||||
|  |  | ||||||
|  |         nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0')) | ||||||
|  |         video_id = traverse_obj(nuxt_data, ('item', 'video', {str})) | ||||||
|  |  | ||||||
|  |         if not video_id: | ||||||
|  |             raise ExtractorError('Unable to extract actual video ID') | ||||||
|  |  | ||||||
|  |         video_data = self._download_json( | ||||||
|  |             f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}', | ||||||
|  |             video_id, headers={'Referer': 'http://www.amadeus.tv/'}) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})): | ||||||
|  |             if not url_or_none(video.get('url')): | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 **traverse_obj(video, { | ||||||
|  |                     'url': 'url', | ||||||
|  |                     'format_id': ('definition', {lambda x: f'http-{x or "0"}'}), | ||||||
|  |                     'width': ('width', {int_or_none}), | ||||||
|  |                     'height': ('height', {int_or_none}), | ||||||
|  |                     'filesize': (('totalSize', 'size'), {int_or_none}), | ||||||
|  |                     'vcodec': ('videoStreamList', 0, 'codec'), | ||||||
|  |                     'acodec': ('audioStreamList', 0, 'codec'), | ||||||
|  |                     'fps': ('videoStreamList', 0, 'fps', {float_or_none}), | ||||||
|  |                 }, get_all=False), | ||||||
|  |                 'http_headers': {'Referer': 'http://www.amadeus.tv/'}, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             **traverse_obj(video_data, { | ||||||
|  |                 'title': ('videoInfo', 'basicInfo', 'name', {str}), | ||||||
|  |                 'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}), | ||||||
|  |                 'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}), | ||||||
|  |             }, get_all=False), | ||||||
|  |             **traverse_obj(nuxt_data, ('item', { | ||||||
|  |                 'title': (('title', 'title_en', 'title_cn'), {str}), | ||||||
|  |                 'description': (('description', 'description_en', 'description_cn'), {str}), | ||||||
|  |                 'timestamp': ('date', {parse_iso8601}), | ||||||
|  |                 'view_count': ('view', {int_or_none}), | ||||||
|  |             }), get_all=False), | ||||||
|  |         } | ||||||
| @@ -1,6 +1,6 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .youtube import YoutubeIE |  | ||||||
| from .vimeo import VimeoIE | from .vimeo import VimeoIE | ||||||
|  | from .youtube import YoutubeIE | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
| @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): | |||||||
|             'uploader': 'PBS NewsHour', |             'uploader': 'PBS NewsHour', | ||||||
|             'uploader_id': 'PBSNewsHour', |             'uploader_id': 'PBSNewsHour', | ||||||
|             'timestamp': 1549639570, |             'timestamp': 1549639570, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # Vimeo |         # Vimeo | ||||||
|         'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', |         'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', | ||||||
| @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): | |||||||
|             'timestamp': 1294763658, |             'timestamp': 1294763658, | ||||||
|             'upload_date': '20110111', |             'upload_date': '20110111', | ||||||
|             'uploader': 'Sam Morrill', |             'uploader': 'Sam Morrill', | ||||||
|             'uploader_id': 'sammorrill' |             'uploader_id': 'sammorrill', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # Direct Link |         # Direct Link | ||||||
|         'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', |         'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', | ||||||
| @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): | |||||||
|             'subtitles': dict, |             'subtitles': dict, | ||||||
|             'upload_date': '20091007', |             'upload_date': '20091007', | ||||||
|             'timestamp': 1254942511, |             'timestamp': 1254942511, | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         meta = self._download_json( |         meta = self._download_json( | ||||||
|             'https://amara.org/api/videos/%s/' % video_id, |             f'https://amara.org/api/videos/{video_id}/', | ||||||
|             video_id, query={'format': 'json'}) |             video_id, query={'format': 'json'}) | ||||||
|         title = meta['title'] |         title = meta['title'] | ||||||
|         video_url = meta['all_urls'][0] |         video_url = meta['all_urls'][0] | ||||||
|   | |||||||
| @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): | |||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         id = self._match_id(url) |         playlist_id = self._match_id(url) | ||||||
|  |  | ||||||
|         for retry in self.RetryManager(): |         for retry in self.RetryManager(): | ||||||
|             webpage = self._download_webpage(url, id) |             webpage = self._download_webpage(url, playlist_id) | ||||||
|             try: |             try: | ||||||
|                 data_json = self._search_json( |                 data_json = self._search_json( | ||||||
|                     r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, |                     r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, | ||||||
|                     transform_source=js_to_json) |                     transform_source=js_to_json) | ||||||
|             except ExtractorError as e: |             except ExtractorError as e: | ||||||
|                 retry.error = e |                 retry.error = e | ||||||
| @@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor): | |||||||
|             'height': int_or_none(video.get('videoHeight')), |             'height': int_or_none(video.get('videoHeight')), | ||||||
|             'width': int_or_none(video.get('videoWidth')), |             'width': int_or_none(video.get('videoWidth')), | ||||||
|         } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] |         } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] | ||||||
|         return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) |         return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) | ||||||
|  |  | ||||||
|  |  | ||||||
| class AmazonReviewsIE(InfoExtractor): | class AmazonReviewsIE(InfoExtractor): | ||||||
|   | |||||||
| @@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor): | |||||||
|  |  | ||||||
|         resp = self._download_json( |         resp = self._download_json( | ||||||
|             f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', |             f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', | ||||||
|             asin, note=note, headers={'Content-Type': 'application/json'}, |             asin, note=note, headers={ | ||||||
|             data=json.dumps(data).encode() if data else None, |                 'Content-Type': 'application/json', | ||||||
|  |                 'currentpageurl': '/', | ||||||
|  |                 'currentplatform': 'dWeb', | ||||||
|  |             }, data=json.dumps(data).encode() if data else None, | ||||||
|             query=None if data else { |             query=None if data else { | ||||||
|                 'deviceType': 'A1WMMUXPCUJL4N', |                 'deviceType': 'A1WMMUXPCUJL4N', | ||||||
|                 'contentId': asin, |                 'contentId': asin, | ||||||
| @@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'May I Kiss You?', |             'title': 'May I Kiss You?', | ||||||
|             'language': 'Hindi', |             'language': 'Hindi', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.(?:jpg|png)$', | ||||||
|             'description': 'md5:a549bfc747973e04feb707833474e59d', |             'description': 'md5:a549bfc747973e04feb707833474e59d', | ||||||
|             'release_timestamp': 1644710400, |             'release_timestamp': 1644710400, | ||||||
|             'release_date': '20220213', |             'release_date': '20220213', | ||||||
| @@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Jahaan', |             'title': 'Jahaan', | ||||||
|             'language': 'Hindi', |             'language': 'Hindi', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg', |             'thumbnail': r're:^https?://.*\.(?:jpg|png)', | ||||||
|             'description': 'md5:05eb765a77bf703f322f120ec6867339', |             'description': 'md5:05eb765a77bf703f322f120ec6867339', | ||||||
|             'release_timestamp': 1647475200, |             'release_timestamp': 1647475200, | ||||||
|             'release_date': '20220317', |             'release_date': '20220317', | ||||||
|   | |||||||
| @@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|  |         'skip': '404 Not Found', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', |         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -63,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|         site, display_id = self._match_valid_url(url).groups() |         site, display_id = self._match_valid_url(url).groups() | ||||||
|         requestor_id = self._REQUESTOR_ID_MAP[site] |         requestor_id = self._REQUESTOR_ID_MAP[site] | ||||||
|         page_data = self._download_json( |         page_data = self._download_json( | ||||||
|             'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' |             f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', | ||||||
|             % (requestor_id.lower(), display_id), display_id)['data'] |             display_id)['data'] | ||||||
|         properties = page_data.get('properties') or {} |         properties = page_data.get('properties') or {} | ||||||
|         query = { |         query = { | ||||||
|             'mbr': 'true', |             'mbr': 'true', | ||||||
| @@ -75,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|         try: |         try: | ||||||
|             for v in page_data['children']: |             for v in page_data['children']: | ||||||
|                 if v.get('type') == 'video-player': |                 if v.get('type') == 'video-player': | ||||||
|                     releasePid = v['properties']['currentVideo']['meta']['releasePid'] |                     release_pid = v['properties']['currentVideo']['meta']['releasePid'] | ||||||
|                     tp_path = 'M_UwQC/' + releasePid |                     tp_path = 'M_UwQC/' + release_pid | ||||||
|                     media_url = 'https://link.theplatform.com/s/' + tp_path |                     media_url = 'https://link.theplatform.com/s/' + tp_path | ||||||
|                     video_player_count += 1 |                     video_player_count += 1 | ||||||
|         except KeyError: |         except KeyError: | ||||||
|             pass |             pass | ||||||
|         if video_player_count > 1: |         if video_player_count > 1: | ||||||
|             self.report_warning( |             self.report_warning( | ||||||
|                 'The JSON data has %d video players. Only one will be extracted' % video_player_count) |                 f'The JSON data has {video_player_count} video players. Only one will be extracted') | ||||||
|  |  | ||||||
|         # Fall back to videoPid if releasePid not found. |         # Fall back to videoPid if releasePid not found. | ||||||
|         # TODO: Fall back to videoPid if releasePid manifest uses DRM. |         # TODO: Fall back to videoPid if releasePid manifest uses DRM. | ||||||
| @@ -130,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE | |||||||
|         }) |         }) | ||||||
|         ns_keys = theplatform_metadata.get('$xmlns', {}).keys() |         ns_keys = theplatform_metadata.get('$xmlns', {}).keys() | ||||||
|         if ns_keys: |         if ns_keys: | ||||||
|             ns = list(ns_keys)[0] |             ns = next(iter(ns_keys)) | ||||||
|             episode = theplatform_metadata.get(ns + '$episodeTitle') or None |             episode = theplatform_metadata.get(ns + '$episodeTitle') or None | ||||||
|             episode_number = int_or_none( |             episode_number = int_or_none( | ||||||
|                 theplatform_metadata.get(ns + '$episode')) |                 theplatform_metadata.get(ns + '$episode')) | ||||||
|   | |||||||
| @@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor): | |||||||
|             resource_type = 'episodes' |             resource_type = 'episodes' | ||||||
|  |  | ||||||
|         resource = self._download_json( |         resource = self._download_json( | ||||||
|             'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) |             f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) | ||||||
|         video = resource['video'] if is_episode else resource |         video = resource['video'] if is_episode else resource | ||||||
|         episode = resource if is_episode else resource.get('episode') or {} |         episode = resource if is_episode else resource.get('episode') or {} | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url_transparent', |             '_type': 'url_transparent', | ||||||
|             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], |             'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), | ||||||
|             'ie_key': 'Zype', |             'ie_key': 'Zype', | ||||||
|             'description': clean_html(video.get('description')), |             'description': clean_html(video.get('description')), | ||||||
|             'timestamp': unified_timestamp(video.get('publishDate')), |             'timestamp': unified_timestamp(video.get('publishDate')), | ||||||
| @@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): | |||||||
|         ] |         ] | ||||||
|  |  | ||||||
|         if season_number: |         if season_number: | ||||||
|             playlist_id = 'season_%d' % season_number |             playlist_id = f'season_{season_number}' | ||||||
|             playlist_title = 'Season %d' % season_number |             playlist_title = f'Season {season_number}' | ||||||
|             facet_filters.append('search_season_list:' + playlist_title) |             facet_filters.append('search_season_list:' + playlist_title) | ||||||
|         else: |         else: | ||||||
|             playlist_id = show |             playlist_id = show | ||||||
|             playlist_title = title |             playlist_title = title | ||||||
|  |  | ||||||
|         season_search = self._download_json( |         season_search = self._download_json( | ||||||
|             'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, |             f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', | ||||||
|             playlist_id, headers={ |             playlist_id, headers={ | ||||||
|                 'Origin': 'https://www.americastestkitchen.com', |                 'Origin': 'https://www.americastestkitchen.com', | ||||||
|                 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', |                 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', | ||||||
|                 'X-Algolia-Application-Id': 'Y1FNZXUI30', |                 'X-Algolia-Application-Id': 'Y1FNZXUI30', | ||||||
|             }, query={ |             }, query={ | ||||||
|                 'facetFilters': json.dumps(facet_filters), |                 'facetFilters': json.dumps(facet_filters), | ||||||
|                 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, |                 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', | ||||||
|                 'attributesToHighlight': '', |                 'attributesToHighlight': '', | ||||||
|                 'hitsPerPage': 1000, |                 'hitsPerPage': 1000, | ||||||
|             }) |             }) | ||||||
| @@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): | |||||||
|                     'description': episode.get('description'), |                     'description': episode.get('description'), | ||||||
|                     'timestamp': unified_timestamp(episode.get('search_document_date')), |                     'timestamp': unified_timestamp(episode.get('search_document_date')), | ||||||
|                     'season_number': season_number, |                     'season_number': season_number, | ||||||
|                     'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), |                     'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), | ||||||
|                     'ie_key': AmericasTestKitchenIE.ie_key(), |                     'ie_key': AmericasTestKitchenIE.ie_key(), | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     determine_ext, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
| @@ -19,12 +19,12 @@ class AMPIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|             'Unable to download Akamai AMP feed', transform_source=strip_jsonp) |             'Unable to download Akamai AMP feed', transform_source=strip_jsonp) | ||||||
|         item = feed.get('channel', {}).get('item') |         item = feed.get('channel', {}).get('item') | ||||||
|         if not item: |         if not item: | ||||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) |             raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) | ||||||
|  |  | ||||||
|         video_id = item['guid'] |         video_id = item['guid'] | ||||||
|  |  | ||||||
|         def get_media_node(name, default=None): |         def get_media_node(name, default=None): | ||||||
|             media_name = 'media-%s' % name |             media_name = f'media-{name}' | ||||||
|             media_group = item.get('media-group') or item |             media_group = item.get('media-group') or item | ||||||
|             return media_group.get(media_name) or item.get(media_name) or item.get(name, default) |             return media_group.get(media_name) or item.get(media_name) or item.get(name, default) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ from ..utils import ( | |||||||
|     int_or_none, |     int_or_none, | ||||||
|     str_or_none, |     str_or_none, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     unified_timestamp |     unified_timestamp, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): | |||||||
|             'release_date': '20230121', |             'release_date': '20230121', | ||||||
|             'release_timestamp': 1674285179, |             'release_timestamp': 1674285179, | ||||||
|             'episode_id': 'e1tpt3d', |             'episode_id': 'e1tpt3d', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # embed url |         # embed url | ||||||
|         'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', |         'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', | ||||||
| @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): | |||||||
|             'season': 'Season 2', |             'season': 'Season 2', | ||||||
|             'season_number': 2, |             'season_number': 2, | ||||||
|             'episode_id': 'e1shjqd', |             'episode_id': 'e1shjqd', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     _WEBPAGE_TESTS = [{ |     _WEBPAGE_TESTS = [{ | ||||||
| @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): | |||||||
|             'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', |             'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', | ||||||
|             'uploader': 'Podcast Tempo', |             'uploader': 'Podcast Tempo', | ||||||
|             'channel': 'apakatatempo', |             'channel': 'apakatatempo', | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import url_or_none, merge_dicts | from ..utils import merge_dicts, url_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
| class AngelIE(InfoExtractor): | class AngelIE(InfoExtractor): | ||||||
| @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): | |||||||
|             'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', |             'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', | ||||||
|             'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', |             'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', | ||||||
|             'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', |             'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', | ||||||
|             'duration': 1359.0 |             'duration': 1359.0, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', |         'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', | ||||||
|         'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', |         'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', | ||||||
| @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): | |||||||
|             'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', |             'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', | ||||||
|             'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', |             'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', | ||||||
|             'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', |             'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', | ||||||
|             'duration': 3276.0 |             'duration': 3276.0, | ||||||
|         } |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -44,7 +44,7 @@ class AngelIE(InfoExtractor): | |||||||
|             'title': self._og_search_title(webpage), |             'title': self._og_search_title(webpage), | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'subtitles': subtitles |             'subtitles': subtitles, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         # Angel uses cloudinary in the background and supports image transformations. |         # Angel uses cloudinary in the background and supports image transformations. | ||||||
|   | |||||||
| @@ -5,22 +5,26 @@ from ..networking import HEADRequest | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|  |     make_archive_id, | ||||||
|     scale_thumbnails_to_max_format_width, |     scale_thumbnails_to_max_format_width, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Ant1NewsGrBaseIE(InfoExtractor): | class AntennaBaseIE(InfoExtractor): | ||||||
|     def _download_and_extract_api_data(self, video_id, netloc, cid=None): |     def _download_and_extract_api_data(self, video_id, netloc, cid=None): | ||||||
|         url = f'{self.http_scheme()}//{netloc}{self._API_PATH}' |         info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}', | ||||||
|         info = self._download_json(url, video_id, query={'cid': cid or video_id}) |                                    video_id, query={'cid': cid or video_id}) | ||||||
|         try: |         if not info.get('url'): | ||||||
|             source = info['url'] |             raise ExtractorError(f'No source found for {video_id}') | ||||||
|         except KeyError: | 
 | ||||||
|             raise ExtractorError('no source found for %s' % video_id) |         ext = determine_ext(info['url']) | ||||||
|         formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') |         if ext == 'm3u8': | ||||||
|                          if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) |             formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4') | ||||||
|  |         else: | ||||||
|  |             formats, subs = [{'url': info['url'], 'format_id': ext}], {} | ||||||
|  | 
 | ||||||
|         thumbnails = scale_thumbnails_to_max_format_width( |         thumbnails = scale_thumbnails_to_max_format_width( | ||||||
|             formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') |             formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else [] | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': info.get('title'), |             'title': info.get('title'), | ||||||
| @@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor): | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE): | class AntennaGrWatchIE(AntennaBaseIE): | ||||||
|     IE_NAME = 'ant1newsgr:watch' |     IE_NAME = 'antenna:watch' | ||||||
|     IE_DESC = 'ant1news.gr videos' |     IE_DESC = 'antenna.gr and ant1news.gr videos' | ||||||
|     _VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/' |     _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/' | ||||||
|     _API_PATH = '/templates/data/player' |     _API_PATH = '/templates/data/player' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45', |         'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45', | ||||||
|         'md5': '95925e6b32106754235f2417e0d2dfab', |         'md5': 'c472d9dd7cd233c63aff2ea42201cda6', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1506168', |             'id': '1506168', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a', |             'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a', | ||||||
|             'description': 'md5:18665af715a6dcfeac1d6153a44f16b0', |             'description': 'md5:18665af715a6dcfeac1d6153a44f16b0', | ||||||
|             'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg', |             'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01', | ||||||
|  |         'md5': '8f6f7dd3b1dba4d835ba990e25f31243', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1643812', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'format_id': 'mp4', | ||||||
|  |             'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01', | ||||||
|  |             'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg', | ||||||
|         }, |         }, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
| @@ -52,25 +66,26 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE): | |||||||
|         video_id, netloc = self._match_valid_url(url).group('id', 'netloc') |         video_id, netloc = self._match_valid_url(url).group('id', 'netloc') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         info = self._download_and_extract_api_data(video_id, netloc) |         info = self._download_and_extract_api_data(video_id, netloc) | ||||||
|         info['description'] = self._og_search_description(webpage) |         info['description'] = self._og_search_description(webpage, default=None) | ||||||
|  |         info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)] | ||||||
|         return info |         return info | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): | class Ant1NewsGrArticleIE(AntennaBaseIE): | ||||||
|     IE_NAME = 'ant1newsgr:article' |     IE_NAME = 'ant1newsgr:article' | ||||||
|     IE_DESC = 'ant1news.gr articles' |     IE_DESC = 'ant1news.gr articles' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/' |     _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', |         'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', | ||||||
|         'md5': '294f18331bb516539d72d85a82887dcc', |         'md5': '57eb8d12181f0fa2b14b0b138e1de9b6', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '_xvg/m_cmbatw=', |             'id': '_xvg/m_cmbatw=', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', |             'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', | ||||||
|             'timestamp': 1603092840, |             'timestamp': 1666166520, | ||||||
|             'upload_date': '20201019', |             'upload_date': '20221019', | ||||||
|             'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', |             'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg', | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', |         'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', | ||||||
| @@ -90,19 +105,19 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): | |||||||
|         info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') |         info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') | ||||||
|         embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) |         embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) | ||||||
|         if not embed_urls: |         if not embed_urls: | ||||||
|             raise ExtractorError('no videos found for %s' % video_id, expected=True) |             raise ExtractorError(f'no videos found for {video_id}', expected=True) | ||||||
|         return self.playlist_from_matches( |         return self.playlist_from_matches( | ||||||
|             embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), |             embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), | ||||||
|             video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) |             video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE): | class Ant1NewsGrEmbedIE(AntennaBaseIE): | ||||||
|     IE_NAME = 'ant1newsgr:embed' |     IE_NAME = 'ant1newsgr:embed' | ||||||
|     IE_DESC = 'ant1news.gr embedded videos' |     IE_DESC = 'ant1news.gr embedded videos' | ||||||
|     _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' |     _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' | ||||||
|     _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' |     _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' | ||||||
|     _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] |     _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] | ||||||
|     _API_PATH = '/news/templates/data/jsonPlayer' |     _API_PATH = '/templates/data/jsonPlayer' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', |         'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', | ||||||
| @@ -8,10 +8,8 @@ import time | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..aes import aes_encrypt | from ..aes import aes_encrypt | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     bytes_to_intlist, |  | ||||||
|     determine_ext, |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     intlist_to_bytes, |  | ||||||
|     join_nonempty, |     join_nonempty, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     strip_jsonp, |     strip_jsonp, | ||||||
| @@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor): | |||||||
|     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'  # from anvplayer.min.js |     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'  # from anvplayer.min.js | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14 |  | ||||||
|         'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441', |  | ||||||
|         'md5': '921919dab3cd0b849ff3d624831ae3e2', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '899441', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14', |  | ||||||
|             'description': 'md5:85e05a3cc163f8c344340f220521136d', |  | ||||||
|             'upload_date': '20201215', |  | ||||||
|             'timestamp': 1608009755, |  | ||||||
|             'thumbnail': r're:^https?://.*\.jpg', |  | ||||||
|             'uploader': 'NFL', |  | ||||||
|             'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights', |  | ||||||
|                      'Player Highlights', 'Cleveland Browns', 'league'], |  | ||||||
|             'duration': 157, |  | ||||||
|             'categories': ['Entertainment', 'Game', 'Highlights'], |  | ||||||
|         }, |  | ||||||
|     }, { |  | ||||||
|         # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ |         # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ | ||||||
|         'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', |         'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', | ||||||
|         'md5': '837718bcfb3a7778d022f857f7a9b19e', |         'md5': '837718bcfb3a7778d022f857f7a9b19e', | ||||||
| @@ -238,32 +218,7 @@ class AnvatoIE(InfoExtractor): | |||||||
|         'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', |         'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', | ||||||
|         'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', |         'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', | ||||||
|         'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', |         'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', | ||||||
|         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' |         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', | ||||||
|     } |  | ||||||
|  |  | ||||||
|     def _generate_nfl_token(self, anvack, mcp_id): |  | ||||||
|         reroute = self._download_json( |  | ||||||
|             'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials', |  | ||||||
|             headers={'X-Domain-Id': 100}, note='Fetching token info') |  | ||||||
|         token_type = reroute.get('token_type') or 'Bearer' |  | ||||||
|         auth_token = f'{token_type} {reroute["access_token"]}' |  | ||||||
|         response = self._download_json( |  | ||||||
|             'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({ |  | ||||||
|                 'query': '''{ |  | ||||||
|   viewer { |  | ||||||
|     mediaToken(anvack: "%s", id: %s) { |  | ||||||
|       token |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }''' % (anvack, mcp_id), |  | ||||||
|             }).encode(), headers={ |  | ||||||
|                 'Authorization': auth_token, |  | ||||||
|                 'Content-Type': 'application/json', |  | ||||||
|             }, note='Fetching NFL API token') |  | ||||||
|         return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token')) |  | ||||||
|  |  | ||||||
|     _TOKEN_GENERATORS = { |  | ||||||
|         'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token, |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _server_time(self, access_key, video_id): |     def _server_time(self, access_key, video_id): | ||||||
| @@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor): | |||||||
|         server_time = self._server_time(access_key, video_id) |         server_time = self._server_time(access_key, video_id) | ||||||
|         input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' |         input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' | ||||||
|  |  | ||||||
|         auth_secret = intlist_to_bytes(aes_encrypt( |         auth_secret = bytes(aes_encrypt( | ||||||
|             bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) |             list(input_data[:64].encode()), list(self._AUTH_KEY))) | ||||||
|         query = { |         query = { | ||||||
|             'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), |             'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), | ||||||
|             'rtyp': 'fp', |             'rtyp': 'fp', | ||||||
| @@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|         if extracted_token is not None: |         if extracted_token is not None: | ||||||
|             api['anvstk2'] = extracted_token |             api['anvstk2'] = extracted_token | ||||||
|         elif self._TOKEN_GENERATORS.get(access_key) is not None: |  | ||||||
|             api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id) |  | ||||||
|         elif self._ANVACK_TABLE.get(access_key) is not None: |         elif self._ANVACK_TABLE.get(access_key) is not None: | ||||||
|             api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') |             api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') | ||||||
|         else: |         else: | ||||||
| @@ -299,7 +252,7 @@ class AnvatoIE(InfoExtractor): | |||||||
|  |  | ||||||
|         return self._download_json( |         return self._download_json( | ||||||
|             video_data_url, video_id, transform_source=strip_jsonp, query=query, |             video_data_url, video_id, transform_source=strip_jsonp, query=query, | ||||||
|             data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) |             data=json.dumps({'api': api}, separators=(',', ':')).encode()) | ||||||
|  |  | ||||||
|     def _get_anvato_videos(self, access_key, video_id, token): |     def _get_anvato_videos(self, access_key, video_id, token): | ||||||
|         video_data = self._get_video_json(access_key, video_id, token) |         video_data = self._get_video_json(access_key, video_id, token) | ||||||
| @@ -358,7 +311,7 @@ class AnvatoIE(InfoExtractor): | |||||||
|         for caption in video_data.get('captions', []): |         for caption in video_data.get('captions', []): | ||||||
|             a_caption = { |             a_caption = { | ||||||
|                 'url': caption['url'], |                 'url': caption['url'], | ||||||
|                 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None |                 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, | ||||||
|             } |             } | ||||||
|             subtitles.setdefault(caption['language'], []).append(a_caption) |             subtitles.setdefault(caption['language'], []).append(a_caption) | ||||||
|         subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) |         subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class AolIE(YahooIE):  # XXX: Do not subclass from concrete IE | class AolIE(YahooIE):  # XXX: Do not subclass from concrete IE | ||||||
|  |     _WORKING = False | ||||||
|     IE_NAME = 'aol.com' |     IE_NAME = 'aol.com' | ||||||
|     _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' |     _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' | ||||||
|  |  | ||||||
| @@ -29,7 +30,7 @@ class AolIE(YahooIE):  # XXX: Do not subclass from concrete IE | |||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         # video with vidible ID |         # video with vidible ID | ||||||
|         'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', |         'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', | ||||||
| @@ -45,7 +46,7 @@ class AolIE(YahooIE):  # XXX: Do not subclass from concrete IE | |||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', |         'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -82,10 +83,10 @@ class AolIE(YahooIE):  # XXX: Do not subclass from concrete IE | |||||||
|             return self._extract_yahoo_video(video_id, 'us') |             return self._extract_yahoo_video(video_id, 'us') | ||||||
|  |  | ||||||
|         response = self._download_json( |         response = self._download_json( | ||||||
|             'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, |             f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', | ||||||
|             video_id)['response'] |             video_id)['response'] | ||||||
|         if response['statusText'] != 'Ok': |         if response['statusText'] != 'Ok': | ||||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) |             raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) | ||||||
|  |  | ||||||
|         video_data = response['data'] |         video_data = response['data'] | ||||||
|         formats = [] |         formats = [] | ||||||
|   | |||||||
| @@ -34,7 +34,7 @@ class APAIE(InfoExtractor): | |||||||
|         video_id, base_url = mobj.group('id', 'base_url') |         video_id, base_url = mobj.group('id', 'base_url') | ||||||
|  |  | ||||||
|         webpage = self._download_webpage( |         webpage = self._download_webpage( | ||||||
|             '%s/player/%s' % (base_url, video_id), video_id) |             f'{base_url}/player/{video_id}', video_id) | ||||||
|  |  | ||||||
|         jwplatform_id = self._search_regex( |         jwplatform_id = self._search_regex( | ||||||
|             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, |             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, | ||||||
| @@ -47,7 +47,7 @@ class APAIE(InfoExtractor): | |||||||
|  |  | ||||||
|         def extract(field, name=None): |         def extract(field, name=None): | ||||||
|             return self._search_regex( |             return self._search_regex( | ||||||
|                 r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field, |                 rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | ||||||
|                 webpage, name or field, default=None, group='value') |                 webpage, name or field, default=None, group='value') | ||||||
|  |  | ||||||
|         title = extract('title') or video_id |         title = extract('title') or video_id | ||||||
|   | |||||||
| @@ -1,8 +1,5 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ExtractorError, str_to_int | ||||||
|     str_to_int, |  | ||||||
|     ExtractorError |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AppleConnectIE(InfoExtractor): | class AppleConnectIE(InfoExtractor): | ||||||
|   | |||||||
| @@ -1,30 +1,45 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     clean_html, |  | ||||||
|     clean_podcast_url, |     clean_podcast_url, | ||||||
|     get_element_by_class, |  | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     try_get, |  | ||||||
| ) | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
| class ApplePodcastsIE(InfoExtractor): | class ApplePodcastsIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' |     _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|  |         'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654', | ||||||
|  |         'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1000665010654', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': 'Ferreck Dawn - To The Break of Dawn 117', | ||||||
|  |             'episode': 'Ferreck Dawn - To The Break of Dawn 117', | ||||||
|  |             'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc', | ||||||
|  |             'upload_date': '20240812', | ||||||
|  |             'timestamp': 1723449600, | ||||||
|  |             'duration': 3596, | ||||||
|  |             'series': 'Ferreck Dawn - To The Break of Dawn', | ||||||
|  |             'thumbnail': 're:.+[.](png|jpe?g|webp)', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|         'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', |         'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', | ||||||
|         'md5': '41dc31cd650143e530d9423b6b5a344f', |         'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1000482637777', |             'id': '1000482637777', | ||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
|             'title': '207 - Whitney Webb Returns', |             'title': '207 - Whitney Webb Returns', | ||||||
|  |             'episode': '207 - Whitney Webb Returns', | ||||||
|  |             'episode_number': 207, | ||||||
|             'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', |             'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', | ||||||
|             'upload_date': '20200705', |             'upload_date': '20200705', | ||||||
|             'timestamp': 1593932400, |             'timestamp': 1593932400, | ||||||
|             'duration': 6454, |             'duration': 5369, | ||||||
|             'series': 'The Tim Dillon Show', |             'series': 'The Tim Dillon Show', | ||||||
|             'thumbnail': 're:.+[.](png|jpe?g|webp)', |             'thumbnail': 're:.+[.](png|jpe?g|webp)', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', |         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         episode_id = self._match_id(url) |         episode_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, episode_id) |         webpage = self._download_webpage(url, episode_id) | ||||||
|         episode_data = {} |         server_data = self._search_json( | ||||||
|         ember_data = {} |             r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage, | ||||||
|         # new page type 2021-11 |             'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data'] | ||||||
|         amp_data = self._parse_json(self._search_regex( |         model_data = traverse_obj(server_data, ( | ||||||
|             r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<', |             'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer', | ||||||
|             webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {} |             'model', {dict}, any)) | ||||||
|         amp_data = try_get(amp_data, |  | ||||||
|                            lambda a: self._parse_json( |  | ||||||
|                                next(a[x] for x in iter(a) if episode_id in x), |  | ||||||
|                                episode_id), |  | ||||||
|                            dict) or {} |  | ||||||
|         amp_data = amp_data.get('d') or [] |  | ||||||
|         episode_data = try_get( |  | ||||||
|             amp_data, |  | ||||||
|             lambda a: next(x for x in a |  | ||||||
|                            if x['type'] == 'podcast-episodes' and x['id'] == episode_id), |  | ||||||
|             dict) |  | ||||||
|         if not episode_data: |  | ||||||
|             # try pre 2021-11 page type: TODO: consider deleting if no longer used |  | ||||||
|             ember_data = self._parse_json(self._search_regex( |  | ||||||
|                 r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<', |  | ||||||
|                 webpage, 'ember data'), episode_id) or {} |  | ||||||
|             ember_data = ember_data.get(episode_id) or ember_data |  | ||||||
|             episode_data = try_get(ember_data, lambda x: x['data'], dict) |  | ||||||
|         episode = episode_data['attributes'] |  | ||||||
|         description = episode.get('description') or {} |  | ||||||
|  |  | ||||||
|         series = None |  | ||||||
|         for inc in (amp_data or ember_data.get('included') or []): |  | ||||||
|             if inc.get('type') == 'media/podcast': |  | ||||||
|                 series = try_get(inc, lambda x: x['attributes']['name']) |  | ||||||
|         series = series or clean_html(get_element_by_class('podcast-header__identity', webpage)) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': episode_id, |             'id': episode_id, | ||||||
|             'title': episode.get('name'), |             **self._json_ld( | ||||||
|             'url': clean_podcast_url(episode['assetUrl']), |                 traverse_obj(server_data, ('seoData', 'schemaContent', {dict})) | ||||||
|             'description': description.get('standard') or description.get('short'), |                 or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False), | ||||||
|             'timestamp': parse_iso8601(episode.get('releaseDateTime')), |             **traverse_obj(model_data, { | ||||||
|             'duration': int_or_none(episode.get('durationInMilliseconds'), 1000), |                 'title': ('title', {str}), | ||||||
|             'series': series, |                 'url': ('streamUrl', {clean_podcast_url}), | ||||||
|  |                 'timestamp': ('releaseDate', {parse_iso8601}), | ||||||
|  |                 'duration': ('duration', {int_or_none}), | ||||||
|  |             }), | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'vcodec': 'none', |             'vcodec': 'none', | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| import re |  | ||||||
| import json | import json | ||||||
|  | import re | ||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_urlparse |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_duration, |     parse_duration, | ||||||
| @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|                     'uploader_id': 'wb', |                     'uploader_id': 'wb', | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|         ] |         ], | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', |         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|         webpage = self._download_webpage(url, movie) |         webpage = self._download_webpage(url, movie) | ||||||
|         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') |         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | ||||||
|         film_data = self._download_json( |         film_data = self._download_json( | ||||||
|             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, |             f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', | ||||||
|             film_id, fatal=False) |             film_id, fatal=False) | ||||||
|  |  | ||||||
|         if film_data: |         if film_data: | ||||||
| @@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|                         if not src: |                         if not src: | ||||||
|                             continue |                             continue | ||||||
|                         formats.append({ |                         formats.append({ | ||||||
|                             'format_id': '%s-%s' % (version, size), |                             'format_id': f'{version}-{size}', | ||||||
|                             'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), |                             'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), | ||||||
|                             'width': int_or_none(size_data.get('width')), |                             'width': int_or_none(size_data.get('width')), | ||||||
|                             'height': int_or_none(size_data.get('height')), |                             'height': int_or_none(size_data.get('height')), | ||||||
| @@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|             page_data = film_data.get('page', {}) |             page_data = film_data.get('page', {}) | ||||||
|             return self.playlist_result(entries, film_id, page_data.get('movie_title')) |             return self.playlist_result(entries, film_id, page_data.get('movie_title')) | ||||||
|  |  | ||||||
|         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') |         playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') | ||||||
|  |  | ||||||
|         def fix_html(s): |         def fix_html(s): | ||||||
|             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) |             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) | ||||||
| @@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|             # like: http://trailers.apple.com/trailers/wb/gravity/ |             # like: http://trailers.apple.com/trailers/wb/gravity/ | ||||||
|  |  | ||||||
|             def _clean_json(m): |             def _clean_json(m): | ||||||
|                 return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') |                 return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) | ||||||
|             s = re.sub(self._JSON_RE, _clean_json, s) |             s = re.sub(self._JSON_RE, _clean_json, s) | ||||||
|             s = '<html>%s</html>' % s |             return f'<html>{s}</html>' | ||||||
|             return s |  | ||||||
|         doc = self._download_xml(playlist_url, movie, transform_source=fix_html) |         doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | ||||||
|  |  | ||||||
|         playlist = [] |         playlist = [] | ||||||
| @@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) |                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | ||||||
|  |  | ||||||
|             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() |             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() | ||||||
|             settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) |             settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') | ||||||
|             settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') |             settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') | ||||||
|  |  | ||||||
|             formats = [] |             formats = [] | ||||||
|             for format in settings['metadata']['sizes']: |             for fmt in settings['metadata']['sizes']: | ||||||
|                 # The src is a file pointing to the real video file |                 # The src is a file pointing to the real video file | ||||||
|                 format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) |                 format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
|                     'url': format_url, |                     'url': format_url, | ||||||
|                     'format': format['type'], |                     'format': fmt['type'], | ||||||
|                     'width': int_or_none(format['width']), |                     'width': int_or_none(fmt['width']), | ||||||
|                     'height': int_or_none(format['height']), |                     'height': int_or_none(fmt['height']), | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|             playlist.append({ |             playlist.append({ | ||||||
| @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): | |||||||
|             'title': 'Movie Studios', |             'title': 'Movie Studios', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) |     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS)) | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://trailers.apple.com/#section=justadded', |         'url': 'http://trailers.apple.com/#section=justadded', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         section = self._match_id(url) |         section = self._match_id(url) | ||||||
|         section_data = self._download_json( |         section_data = self._download_json( | ||||||
|             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], |             'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), | ||||||
|             section) |             section) | ||||||
|         entries = [ |         entries = [ | ||||||
|             self.url_result('http://trailers.apple.com' + e['location']) |             self.url_result('http://trailers.apple.com' + e['location']) | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
|  | from __future__ import annotations | ||||||
|  |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
| import urllib.parse | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .naver import NaverBaseIE |  | ||||||
| from .youtube import YoutubeBaseInfoExtractor, YoutubeIE | from .youtube import YoutubeBaseInfoExtractor, YoutubeIE | ||||||
| from ..compat import compat_urllib_parse_unquote |  | ||||||
| from ..networking import HEADRequest | from ..networking import HEADRequest | ||||||
| from ..networking.exceptions import HTTPError | from ..networking.exceptions import HTTPError | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @@ -32,6 +32,7 @@ from ..utils import ( | |||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     url_or_none, |     url_or_none, | ||||||
|     urlhandle_detect_ext, |     urlhandle_detect_ext, | ||||||
|  |     variadic, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -50,10 +51,9 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'release_date': '19681210', |             'release_date': '19681210', | ||||||
|             'timestamp': 1268695290, |             'timestamp': 1268695290, | ||||||
|             'upload_date': '20100315', |             'upload_date': '20100315', | ||||||
|             'creator': 'SRI International', |             'creators': ['SRI International'], | ||||||
|             'uploader': 'laura@archive.org', |             'uploader': 'laura@archive.org', | ||||||
|             'thumbnail': r're:https://archive\.org/download/.*\.jpg', |             'thumbnail': r're:https://archive\.org/download/.*\.jpg', | ||||||
|             'release_year': 1968, |  | ||||||
|             'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', |             'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', | ||||||
|             'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect', |             'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect', | ||||||
|  |  | ||||||
| @@ -111,7 +111,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'title': 'Turning', |             'title': 'Turning', | ||||||
|             'ext': 'flac', |             'ext': 'flac', | ||||||
|             'track': 'Turning', |             'track': 'Turning', | ||||||
|             'creator': 'Grateful Dead', |             'creators': ['Grateful Dead'], | ||||||
|             'display_id': 'gd1977-05-08d01t01.flac', |             'display_id': 'gd1977-05-08d01t01.flac', | ||||||
|             'track_number': 1, |             'track_number': 1, | ||||||
|             'album': '1977-05-08 - Barton Hall - Cornell University', |             'album': '1977-05-08 - Barton Hall - Cornell University', | ||||||
| @@ -131,11 +131,10 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'location': 'Barton Hall - Cornell University', |             'location': 'Barton Hall - Cornell University', | ||||||
|             'duration': 438.68, |             'duration': 438.68, | ||||||
|             'track': 'Deal', |             'track': 'Deal', | ||||||
|             'creator': 'Grateful Dead', |             'creators': ['Grateful Dead'], | ||||||
|             'album': '1977-05-08 - Barton Hall - Cornell University', |             'album': '1977-05-08 - Barton Hall - Cornell University', | ||||||
|             'release_date': '19770508', |             'release_date': '19770508', | ||||||
|             'display_id': 'gd1977-05-08d01t07.flac', |             'display_id': 'gd1977-05-08d01t07.flac', | ||||||
|             'release_year': 1977, |  | ||||||
|             'track_number': 7, |             'track_number': 7, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
| @@ -147,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'title': 'Bells Of Rostov', |             'title': 'Bells Of Rostov', | ||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
|         }, |         }, | ||||||
|         'skip': 'restricted' |         'skip': 'restricted', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', |         'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', | ||||||
|         'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', |         'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', | ||||||
| @@ -160,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'description': 'md5:012b2d668ae753be36896f343d12a236', |             'description': 'md5:012b2d668ae753be36896f343d12a236', | ||||||
|             'upload_date': '20190928', |             'upload_date': '20190928', | ||||||
|         }, |         }, | ||||||
|         'skip': 'restricted' |         'skip': 'restricted', | ||||||
|     }, { |     }, { | ||||||
|         # Original formats are private |         # Original formats are private | ||||||
|         'url': 'https://archive.org/details/irelandthemakingofarepublic', |         'url': 'https://archive.org/details/irelandthemakingofarepublic', | ||||||
| @@ -170,7 +169,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'upload_date': '20160610', |             'upload_date': '20160610', | ||||||
|             'description': 'md5:f70956a156645a658a0dc9513d9e78b7', |             'description': 'md5:f70956a156645a658a0dc9513d9e78b7', | ||||||
|             'uploader': 'dimitrios@archive.org', |             'uploader': 'dimitrios@archive.org', | ||||||
|             'creator': ['British Broadcasting Corporation', 'Time-Life Films'], |             'creators': ['British Broadcasting Corporation', 'Time-Life Films'], | ||||||
|             'timestamp': 1465594947, |             'timestamp': 1465594947, | ||||||
|         }, |         }, | ||||||
|         'playlist': [ |         'playlist': [ | ||||||
| @@ -204,8 +203,28 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|                     'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', |                     'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', | ||||||
|                     'display_id': 'irelandthemakingofarepublicreel2.mov', |                     'display_id': 'irelandthemakingofarepublicreel2.mov', | ||||||
|                 }, |                 }, | ||||||
|             } |             }, | ||||||
|         ] |         ], | ||||||
|  |     }, { | ||||||
|  |         # The reviewbody is None for one of the reviews; just need to extract data without crashing | ||||||
|  |         'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': 'Stuck Inside of Mobile with the Memphis Blues Again', | ||||||
|  |             'creators': ['Grateful Dead'], | ||||||
|  |             'duration': 338.31, | ||||||
|  |             'track': 'Stuck Inside of Mobile with the Memphis Blues Again', | ||||||
|  |             'description': 'md5:764348a470b986f1217ffd38d6ac7b72', | ||||||
|  |             'display_id': 'gd95-04-02d1t04.shn', | ||||||
|  |             'location': 'Pyramid Arena', | ||||||
|  |             'uploader': 'jon@archive.org', | ||||||
|  |             'album': '1995-04-02 - Pyramid Arena', | ||||||
|  |             'upload_date': '20040519', | ||||||
|  |             'track_number': 4, | ||||||
|  |             'release_date': '19950402', | ||||||
|  |             'timestamp': 1084927901, | ||||||
|  |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -222,7 +241,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = urllib.parse.unquote_plus(self._match_id(url)) |         video_id = urllib.parse.unquote_plus(self._match_id(url)) | ||||||
|         identifier, entry_id = (video_id.split('/', 1) + [None])[:2] |         identifier, _, entry_id = video_id.partition('/') | ||||||
|  |  | ||||||
|         # Archive.org metadata API doesn't clearly demarcate playlist entries |         # Archive.org metadata API doesn't clearly demarcate playlist entries | ||||||
|         # or subtitle tracks, so we get them from the embeddable player. |         # or subtitle tracks, so we get them from the embeddable player. | ||||||
| @@ -248,7 +267,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|                 if track['kind'] != 'subtitles': |                 if track['kind'] != 'subtitles': | ||||||
|                     continue |                     continue | ||||||
|                 entries[p['orig']][track['label']] = { |                 entries[p['orig']][track['label']] = { | ||||||
|                     'url': 'https://archive.org/' + track['file'].lstrip('/') |                     'url': 'https://archive.org/' + track['file'].lstrip('/'), | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|         metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) |         metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) | ||||||
| @@ -260,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             'title': m['title'], |             'title': m['title'], | ||||||
|             'description': clean_html(m.get('description')), |             'description': clean_html(m.get('description')), | ||||||
|             'uploader': dict_get(m, ['uploader', 'adder']), |             'uploader': dict_get(m, ['uploader', 'adder']), | ||||||
|             'creator': m.get('creator'), |             'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})), | ||||||
|             'license': m.get('licenseurl'), |             'license': m.get('licenseurl'), | ||||||
|             'release_date': unified_strdate(m.get('date')), |             'release_date': unified_strdate(m.get('date')), | ||||||
|             'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), |             'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), | ||||||
| @@ -275,7 +294,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|                     'title': f.get('title') or f['name'], |                     'title': f.get('title') or f['name'], | ||||||
|                     'display_id': f['name'], |                     'display_id': f['name'], | ||||||
|                     'description': clean_html(f.get('description')), |                     'description': clean_html(f.get('description')), | ||||||
|                     'creator': f.get('creator'), |                     'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})), | ||||||
|                     'duration': parse_duration(f.get('length')), |                     'duration': parse_duration(f.get('length')), | ||||||
|                     'track_number': int_or_none(f.get('track')), |                     'track_number': int_or_none(f.get('track')), | ||||||
|                     'album': f.get('album'), |                     'album': f.get('album'), | ||||||
| @@ -295,7 +314,9 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|                     'height': int_or_none(f.get('width')), |                     'height': int_or_none(f.get('width')), | ||||||
|                     'filesize': int_or_none(f.get('size'))}) |                     'filesize': int_or_none(f.get('size'))}) | ||||||
|  |  | ||||||
|             extension = (f['name'].rsplit('.', 1) + [None])[1] |             _, has_ext, extension = f['name'].rpartition('.') | ||||||
|  |             if not has_ext: | ||||||
|  |                 extension = None | ||||||
|  |  | ||||||
|             # We don't want to skip private formats if the user has access to them, |             # We don't want to skip private formats if the user has access to them, | ||||||
|             # however without access to an account with such privileges we can't implement/test this. |             # however without access to an account with such privileges we can't implement/test this. | ||||||
| @@ -303,14 +324,14 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|             is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) |             is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) | ||||||
|             if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): |             if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): | ||||||
|                 entry['formats'].append({ |                 entry['formats'].append({ | ||||||
|                     'url': 'https://archive.org/download/' + identifier + '/' + f['name'], |                     'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']), | ||||||
|                     'format': f.get('format'), |                     'format': f.get('format'), | ||||||
|                     'width': int_or_none(f.get('width')), |                     'width': int_or_none(f.get('width')), | ||||||
|                     'height': int_or_none(f.get('height')), |                     'height': int_or_none(f.get('height')), | ||||||
|                     'filesize': int_or_none(f.get('size')), |                     'filesize': int_or_none(f.get('size')), | ||||||
|                     'protocol': 'https', |                     'protocol': 'https', | ||||||
|                     'source_preference': 0 if f.get('source') == 'original' else -1, |                     'source_preference': 0 if f.get('source') == 'original' else -1, | ||||||
|                     'format_note': f.get('source') |                     'format_note': f.get('source'), | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|         for entry in entries.values(): |         for entry in entries.values(): | ||||||
| @@ -334,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor): | |||||||
|                 info['comments'].append({ |                 info['comments'].append({ | ||||||
|                     'id': review.get('review_id'), |                     'id': review.get('review_id'), | ||||||
|                     'author': review.get('reviewer'), |                     'author': review.get('reviewer'), | ||||||
|                     'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), |                     'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'), | ||||||
|                     'timestamp': unified_timestamp(review.get('createdate')), |                     'timestamp': unified_timestamp(review.get('createdate')), | ||||||
|                     'parent': 'root'}) |                     'parent': 'root'}) | ||||||
|  |  | ||||||
| @@ -373,7 +394,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader_url': 'https://www.youtube.com/user/Zeurel', |                 'uploader_url': 'https://www.youtube.com/user/Zeurel', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', |                 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # Internal link |             # Internal link | ||||||
|             'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', |             'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', | ||||||
| @@ -390,7 +411,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader_url': 'https://www.youtube.com/user/1veritasium', |                 'uploader_url': 'https://www.youtube.com/user/1veritasium', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', |                 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. |             # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. | ||||||
|             # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description |             # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description | ||||||
| @@ -405,8 +426,8 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader_id': 'machinima', |                 'uploader_id': 'machinima', | ||||||
|                 'uploader_url': 'https://www.youtube.com/user/machinima', |                 'uploader_url': 'https://www.youtube.com/user/machinima', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'uploader': 'machinima' |                 'uploader': 'machinima', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # FLV video. Video file URL does not provide itag information |             # FLV video. Video file URL does not provide itag information | ||||||
|             'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', |             'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', | ||||||
| @@ -423,7 +444,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', |                 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'uploader': 'jawed', |                 'uploader': 'jawed', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', |             'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -439,7 +460,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader_url': 'https://www.youtube.com/user/itsmadeon', |                 'uploader_url': 'https://www.youtube.com/user/itsmadeon', | ||||||
|                 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', |                 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # First capture is of dead video, second is the oldest from CDX response. |             # First capture is of dead video, second is the oldest from CDX response. | ||||||
|             'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', |             'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', | ||||||
| @@ -456,7 +477,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', |                 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'uploader': 'ETC News', |                 'uploader': 'ETC News', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # First capture of dead video, capture date in link links to dead capture. |             # First capture of dead video, capture date in link links to dead capture. | ||||||
|             'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', |             'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', | ||||||
| @@ -475,15 +496,15 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader': 'ETC News', |                 'uploader': 'ETC News', | ||||||
|             }, |             }, | ||||||
|             'expected_warnings': [ |             'expected_warnings': [ | ||||||
|                 r'unable to download capture webpage \(it may not be archived\)' |                 r'unable to download capture webpage \(it may not be archived\)', | ||||||
|             ] |             ], | ||||||
|         }, {   # Very old YouTube page, has - YouTube in title. |         }, {   # Very old YouTube page, has - YouTube in title. | ||||||
|             'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', |             'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '-06-KB9XTzg', |                 'id': '-06-KB9XTzg', | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|                 'title': 'New Coin Hack!! 100% Safe!!' |                 'title': 'New Coin Hack!! 100% Safe!!', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', |             'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -497,7 +518,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', |                 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'uploader': 'DankPods', |                 'uploader': 'DankPods', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 |             # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 | ||||||
|             'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', |             'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', | ||||||
| @@ -514,7 +535,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader_id': 'PewDiePie', |                 'uploader_id': 'PewDiePie', | ||||||
|                 'uploader_url': 'https://www.youtube.com/user/PewDiePie', |                 'uploader_url': 'https://www.youtube.com/user/PewDiePie', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # ~June 2010 Capture. swfconfig |             # ~June 2010 Capture. swfconfig | ||||||
|             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', |             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', | ||||||
| @@ -529,7 +550,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', |                 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', | ||||||
|                 'upload_date': '20090520', |                 'upload_date': '20090520', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # Jan 2011: watch-video-date/eow-date surrounded by whitespace |             # Jan 2011: watch-video-date/eow-date surrounded by whitespace | ||||||
|             'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', |             'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', | ||||||
| @@ -544,7 +565,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'duration': 132, |                 'duration': 132, | ||||||
|                 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', |                 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # ~May 2009 swfArgs. ytcfg is spread out over various vars |             # ~May 2009 swfArgs. ytcfg is spread out over various vars | ||||||
|             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', |             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', | ||||||
| @@ -559,7 +580,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', |                 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'duration': 754, |                 'duration': 754, | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # ~June 2012. Upload date is in another lang so cannot extract. |             # ~June 2012. Upload date is in another lang so cannot extract. | ||||||
|             'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', |             'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', | ||||||
| @@ -573,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'uploader': 'BlackNerdComedy', |                 'uploader': 'BlackNerdComedy', | ||||||
|                 'duration': 182, |                 'duration': 182, | ||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # ~July 2013 |             # ~July 2013 | ||||||
|             'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', |             'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', | ||||||
| @@ -589,7 +610,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', |                 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', | ||||||
|                 'upload_date': '20060428', |                 'upload_date': '20060428', | ||||||
|                 'uploader': 'punkybird', |                 'uploader': 'punkybird', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # April 2020: Player response in player config |             # April 2020: Player response in player config | ||||||
|             'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', |             'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', | ||||||
| @@ -606,7 +627,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', |                 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', | ||||||
|                 'uploader_url': 'https://www.youtube.com/user/GameGrumps', |                 'uploader_url': 'https://www.youtube.com/user/GameGrumps', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # watch7-user-header with yt-user-info |             # watch7-user-header with yt-user-info | ||||||
|             'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', |             'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', | ||||||
| @@ -621,7 +642,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'thumbnail': r're:https?://.*\.(jpg|webp)', |                 'thumbnail': r're:https?://.*\.(jpg|webp)', | ||||||
|                 'upload_date': '20150503', |                 'upload_date': '20150503', | ||||||
|                 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', |                 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # April 2012 |             # April 2012 | ||||||
|             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', |             'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', | ||||||
| @@ -636,35 +657,35 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 'duration': 200, |                 'duration': 200, | ||||||
|                 'upload_date': '20120407', |                 'upload_date': '20120407', | ||||||
|                 'uploader_id': 'thecomputernerd01', |                 'uploader_id': 'thecomputernerd01', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', |             'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', |             'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             # Video not archived, only capture is unavailable video page |             # Video not archived, only capture is unavailable video page | ||||||
|             'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', |             'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, {   # Encoded url |         }, {   # Encoded url | ||||||
|             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', |             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', |             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', |             'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', |             'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'ytarchive:BaW_jenozKc:20050214000000', |             'url': 'ytarchive:BaW_jenozKc:20050214000000', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, { |         }, { | ||||||
|             'url': 'ytarchive:BaW_jenozKc', |             'url': 'ytarchive:BaW_jenozKc', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|     _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE |     _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE | ||||||
| @@ -675,13 +696,13 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|  |  | ||||||
|     _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com']  # thumbnails most likely archived on these servers |     _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com']  # thumbnails most likely archived on these servers | ||||||
|     _YT_ALL_THUMB_SERVERS = orderedSet( |     _YT_ALL_THUMB_SERVERS = orderedSet( | ||||||
|         _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) |         [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) | ||||||
|  |  | ||||||
|     _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' |     _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' | ||||||
|     _OLDEST_CAPTURE_DATE = 20050214000000 |     _OLDEST_CAPTURE_DATE = 20050214000000 | ||||||
|     _NEWEST_CAPTURE_DATE = 20500101000000 |     _NEWEST_CAPTURE_DATE = 20500101000000 | ||||||
|  |  | ||||||
|     def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): |     def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): | ||||||
|         # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md |         # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md | ||||||
|         query = { |         query = { | ||||||
|             'url': url, |             'url': url, | ||||||
| @@ -690,14 +711,14 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|             'limit': 500, |             'limit': 500, | ||||||
|             'filter': ['statuscode:200'] + (filters or []), |             'filter': ['statuscode:200'] + (filters or []), | ||||||
|             'collapse': collapse or [], |             'collapse': collapse or [], | ||||||
|             **(query or {}) |             **(query or {}), | ||||||
|         } |         } | ||||||
|         res = self._download_json( |         res = self._download_json( | ||||||
|             'https://web.archive.org/cdx/search/cdx', item_id, |             'https://web.archive.org/cdx/search/cdx', item_id, | ||||||
|             note or 'Downloading CDX API JSON', query=query, fatal=fatal) |             note or 'Downloading CDX API JSON', query=query, fatal=fatal) | ||||||
|         if isinstance(res, list) and len(res) >= 2: |         if isinstance(res, list) and len(res) >= 2: | ||||||
|             # format response to make it easier to use |             # format response to make it easier to use | ||||||
|             return list(dict(zip(res[0], v)) for v in res[1:]) |             return [dict(zip(res[0], v)) for v in res[1:]] | ||||||
|         elif not isinstance(res, list) or len(res) != 0: |         elif not isinstance(res, list) or len(res) != 0: | ||||||
|             self.report_warning('Error while parsing CDX API response' + bug_reports_message()) |             self.report_warning('Error while parsing CDX API response' + bug_reports_message()) | ||||||
|  |  | ||||||
| @@ -854,7 +875,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|                 { |                 { | ||||||
|                     'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), |                     'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), | ||||||
|                     'filesize': int_or_none(thumbnail_dict.get('length')), |                     'filesize': int_or_none(thumbnail_dict.get('length')), | ||||||
|                     'preference': int_or_none(thumbnail_dict.get('length')) |                     'preference': int_or_none(thumbnail_dict.get('length')), | ||||||
|                 } for thumbnail_dict in response) |                 } for thumbnail_dict in response) | ||||||
|             if not try_all: |             if not try_all: | ||||||
|                 break |                 break | ||||||
| @@ -895,7 +916,7 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|         for retry in retry_manager: |         for retry in retry_manager: | ||||||
|             try: |             try: | ||||||
|                 urlh = self._request_webpage( |                 urlh = self._request_webpage( | ||||||
|                     HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), |                     HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), | ||||||
|                     video_id, note='Fetching archived video file url', expected_status=True) |                     video_id, note='Fetching archived video file url', expected_status=True) | ||||||
|             except ExtractorError as e: |             except ExtractorError as e: | ||||||
|                 # HTTP Error 404 is expected if the video is not saved. |                 # HTTP Error 404 is expected if the video is not saved. | ||||||
| @@ -926,258 +947,24 @@ class YoutubeWebArchiveIE(InfoExtractor): | |||||||
|         info['thumbnails'] = self._extract_thumbnails(video_id) |         info['thumbnails'] = self._extract_thumbnails(video_id) | ||||||
|  |  | ||||||
|         if urlh: |         if urlh: | ||||||
|             url = compat_urllib_parse_unquote(urlh.url) |             url = urllib.parse.unquote(urlh.url) | ||||||
|             video_file_url_qs = parse_qs(url) |             video_file_url_qs = parse_qs(url) | ||||||
|             # Attempt to recover any ext & format info from playback url & response headers |             # Attempt to recover any ext & format info from playback url & response headers | ||||||
|             format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} |             fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} | ||||||
|             itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) |             itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) | ||||||
|             if itag and itag in YoutubeIE._formats: |             if itag and itag in YoutubeIE._formats: | ||||||
|                 format.update(YoutubeIE._formats[itag]) |                 fmt.update(YoutubeIE._formats[itag]) | ||||||
|                 format.update({'format_id': itag}) |                 fmt.update({'format_id': itag}) | ||||||
|             else: |             else: | ||||||
|                 mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) |                 mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) | ||||||
|                 ext = (mimetype2ext(mime) |                 ext = (mimetype2ext(mime) | ||||||
|                        or urlhandle_detect_ext(urlh) |                        or urlhandle_detect_ext(urlh) | ||||||
|                        or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) |                        or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) | ||||||
|                 format.update({'ext': ext}) |                 fmt.update({'ext': ext}) | ||||||
|             info['formats'] = [format] |             info['formats'] = [fmt] | ||||||
|             if not info.get('duration'): |             if not info.get('duration'): | ||||||
|                 info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) |                 info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) | ||||||
|  |  | ||||||
|         if not info.get('title'): |         if not info.get('title'): | ||||||
|             info['title'] = video_id |             info['title'] = video_id | ||||||
|         return info |         return info | ||||||
|  |  | ||||||
|  |  | ||||||
| class VLiveWebArchiveIE(InfoExtractor): |  | ||||||
|     IE_NAME = 'web.archive:vlive' |  | ||||||
|     IE_DESC = 'web.archive.org saved vlive videos' |  | ||||||
|     _VALID_URL = r'''(?x) |  | ||||||
|             (?:https?://)?web\.archive\.org/ |  | ||||||
|             (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional |  | ||||||
|             (?:https?(?::|%3[Aa])//)?(?: |  | ||||||
|                 (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+)  # VLive URL |  | ||||||
|             ) |  | ||||||
|         ''' |  | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326', |  | ||||||
|         'md5': 'cc7314812855ce56de70a06a27314983', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '1326', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': "Girl's Day's Broadcast", |  | ||||||
|             'creator': "Girl's Day", |  | ||||||
|             'view_count': int, |  | ||||||
|             'uploader_id': 'muploader_a', |  | ||||||
|             'uploader_url': None, |  | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date': '20150817', |  | ||||||
|             'thumbnail': r're:^https?://.*\.(?:jpg|png)$', |  | ||||||
|             'timestamp': 1439816449, |  | ||||||
|             'like_count': int, |  | ||||||
|             'channel': 'Girl\'s Day', |  | ||||||
|             'channel_id': 'FDF27', |  | ||||||
|             'comment_count': int, |  | ||||||
|             'release_timestamp': 1439818140, |  | ||||||
|             'release_date': '20150817', |  | ||||||
|             'duration': 1014, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '16937', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': '첸백시 걍방', |  | ||||||
|             'creator': 'EXO', |  | ||||||
|             'view_count': int, |  | ||||||
|             'subtitles': 'mincount:12', |  | ||||||
|             'uploader_id': 'muploader_j', |  | ||||||
|             'uploader_url': 'http://vlive.tv', |  | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date': '20161112', |  | ||||||
|             'thumbnail': r're:^https?://.*\.(?:jpg|png)$', |  | ||||||
|             'timestamp': 1478923074, |  | ||||||
|             'like_count': int, |  | ||||||
|             'channel': 'EXO', |  | ||||||
|             'channel_id': 'F94BD', |  | ||||||
|             'comment_count': int, |  | ||||||
|             'release_timestamp': 1478924280, |  | ||||||
|             'release_date': '20161112', |  | ||||||
|             'duration': 906, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '101870', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)', |  | ||||||
|             'creator': 'Dispatch', |  | ||||||
|             'view_count': int, |  | ||||||
|             'subtitles': 'mincount:6', |  | ||||||
|             'uploader_id': 'V__FRA08071', |  | ||||||
|             'uploader_url': 'http://vlive.tv', |  | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date': '20181130', |  | ||||||
|             'thumbnail': r're:^https?://.*\.(?:jpg|png)$', |  | ||||||
|             'timestamp': 1543601327, |  | ||||||
|             'like_count': int, |  | ||||||
|             'channel': 'Dispatch', |  | ||||||
|             'channel_id': 'C796F3', |  | ||||||
|             'comment_count': int, |  | ||||||
|             'release_timestamp': 1543601040, |  | ||||||
|             'release_date': '20181130', |  | ||||||
|             'duration': 279, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     # The wayback machine has special timestamp and "mode" values: |  | ||||||
|     # timestamp: |  | ||||||
|     #   1 = the first capture |  | ||||||
|     #   2 = the last capture |  | ||||||
|     # mode: |  | ||||||
|     #   id_ = Identity - perform no alterations of the original resource, return it as it was archived. |  | ||||||
|     _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/' |  | ||||||
|  |  | ||||||
|     def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs): |  | ||||||
|         for retry in self.RetryManager(): |  | ||||||
|             try: |  | ||||||
|                 return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs) |  | ||||||
|             except ExtractorError as e: |  | ||||||
|                 if isinstance(e.cause, HTTPError) and e.cause.status == 404: |  | ||||||
|                     raise ExtractorError('Page was not archived', expected=True) |  | ||||||
|                 retry.error = e |  | ||||||
|                 continue |  | ||||||
|  |  | ||||||
|     def _download_archived_json(self, url, video_id, **kwargs): |  | ||||||
|         page = self._download_archived_page(url, video_id, **kwargs) |  | ||||||
|         if not page: |  | ||||||
|             raise ExtractorError('Page was not archived', expected=True) |  | ||||||
|         else: |  | ||||||
|             return self._parse_json(page, video_id) |  | ||||||
|  |  | ||||||
|     def _extract_formats_from_m3u8(self, m3u8_url, params, video_id): |  | ||||||
|         m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False) |  | ||||||
|         if not m3u8_doc: |  | ||||||
|             return |  | ||||||
|  |  | ||||||
|         # M3U8 document should be changed to archive domain |  | ||||||
|         m3u8_doc = m3u8_doc.splitlines() |  | ||||||
|         url_base = m3u8_url.rsplit('/', 1)[0] |  | ||||||
|         first_segment = None |  | ||||||
|         for i, line in enumerate(m3u8_doc): |  | ||||||
|             if not line.startswith('#'): |  | ||||||
|                 m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}' |  | ||||||
|                 first_segment = first_segment or m3u8_doc[i] |  | ||||||
|  |  | ||||||
|         # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870 |  | ||||||
|         urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False, |  | ||||||
|                                      fatal=False, note='Check first segment availablity') |  | ||||||
|         if urlh: |  | ||||||
|             formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id) |  | ||||||
|             if subtitles: |  | ||||||
|                 self._report_ignoring_subs('m3u8') |  | ||||||
|             return formats |  | ||||||
|  |  | ||||||
|     # Closely follows the logic of the ArchiveTeam grab script |  | ||||||
|     # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         video_id, url_date = self._match_valid_url(url).group('id', 'date') |  | ||||||
|  |  | ||||||
|         webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date) |  | ||||||
|  |  | ||||||
|         player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id) |  | ||||||
|         user_country = traverse_obj(player_info, ('common', 'userCountry')) |  | ||||||
|  |  | ||||||
|         main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url') |  | ||||||
|         main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script') |  | ||||||
|         app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id') |  | ||||||
|  |  | ||||||
|         inkey = self._download_archived_json( |  | ||||||
|             f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={ |  | ||||||
|                 'appId': app_id, |  | ||||||
|                 'platformType': 'PC', |  | ||||||
|                 'gcc': user_country, |  | ||||||
|                 'locale': 'en_US', |  | ||||||
|             }, fatal=False) |  | ||||||
|  |  | ||||||
|         vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId')) |  | ||||||
|  |  | ||||||
|         vod_data = self._download_archived_json( |  | ||||||
|             f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={ |  | ||||||
|                 'key': inkey.get('inkey'), |  | ||||||
|                 'pid': 'rmcPlayer_16692457559726800',  # partially unix time and partially random. Fixed value used by archiveteam project |  | ||||||
|                 'sid': '2024', |  | ||||||
|                 'ver': '2.0', |  | ||||||
|                 'devt': 'html5_pc', |  | ||||||
|                 'doct': 'json', |  | ||||||
|                 'ptc': 'https', |  | ||||||
|                 'sptc': 'https', |  | ||||||
|                 'cpt': 'vtt', |  | ||||||
|                 'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D', |  | ||||||
|                 'pv': '4.26.9', |  | ||||||
|                 'dr': '1920x1080', |  | ||||||
|                 'cpl': 'en_US', |  | ||||||
|                 'lc': 'en_US', |  | ||||||
|                 'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D', |  | ||||||
|                 'adu': '%2F', |  | ||||||
|                 'videoId': vod_id, |  | ||||||
|                 'cc': user_country, |  | ||||||
|             }) |  | ||||||
|  |  | ||||||
|         formats = [] |  | ||||||
|  |  | ||||||
|         streams = traverse_obj(vod_data, ('streams', ...)) |  | ||||||
|         if len(streams) > 1: |  | ||||||
|             self.report_warning('Multiple streams found. Only the first stream will be downloaded.') |  | ||||||
|         stream = streams[0] |  | ||||||
|  |  | ||||||
|         max_stream = max( |  | ||||||
|             stream.get('videos') or [], |  | ||||||
|             key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None) |  | ||||||
|         if max_stream is not None: |  | ||||||
|             params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'} |  | ||||||
|             formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or [] |  | ||||||
|  |  | ||||||
|         # For parts of the project MP4 files were archived |  | ||||||
|         max_video = max( |  | ||||||
|             traverse_obj(vod_data, ('videos', 'list', ...)), |  | ||||||
|             key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None) |  | ||||||
|         if max_video is not None: |  | ||||||
|             video_url = self._WAYBACK_BASE_URL + max_video.get('source') |  | ||||||
|             urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False, |  | ||||||
|                                          fatal=False, note='Check video availablity') |  | ||||||
|             if urlh: |  | ||||||
|                 formats.append({'url': video_url}) |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': video_id, |  | ||||||
|             'formats': formats, |  | ||||||
|             **traverse_obj(player_info, ('postDetail', 'post', { |  | ||||||
|                 'title': ('officialVideo', 'title', {str}), |  | ||||||
|                 'creator': ('author', 'nickname', {str}), |  | ||||||
|                 'channel': ('channel', 'channelName', {str}), |  | ||||||
|                 'channel_id': ('channel', 'channelCode', {str}), |  | ||||||
|                 'duration': ('officialVideo', 'playTime', {int_or_none}), |  | ||||||
|                 'view_count': ('officialVideo', 'playCount', {int_or_none}), |  | ||||||
|                 'like_count': ('officialVideo', 'likeCount', {int_or_none}), |  | ||||||
|                 'comment_count': ('officialVideo', 'commentCount', {int_or_none}), |  | ||||||
|                 'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}), |  | ||||||
|                 'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}), |  | ||||||
|             })), |  | ||||||
|             **traverse_obj(vod_data, ('meta', { |  | ||||||
|                 'uploader_id': ('user', 'id', {str}), |  | ||||||
|                 'uploader': ('user', 'name', {str}), |  | ||||||
|                 'uploader_url': ('user', 'url', {url_or_none}), |  | ||||||
|                 'thumbnail': ('cover', 'source', {url_or_none}), |  | ||||||
|             }), expected_type=lambda x: x or None), |  | ||||||
|             **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]), |  | ||||||
|         } |  | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     extract_attributes, |     extract_attributes, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     try_get, |     try_get, | ||||||
| ) | ) | ||||||
| @@ -11,7 +12,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
| class ArcPublishingIE(InfoExtractor): | class ArcPublishingIE(InfoExtractor): | ||||||
|     _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' |     _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | ||||||
|     _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX |     _VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ |         # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ | ||||||
|         'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', |         'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', | ||||||
| @@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor): | |||||||
|     def _extract_embed_urls(cls, url, webpage): |     def _extract_embed_urls(cls, url, webpage): | ||||||
|         entries = [] |         entries = [] | ||||||
|         # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview |         # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview | ||||||
|         for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): |         for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): | ||||||
|             powa = extract_attributes(powa_el) or {} |             powa = extract_attributes(powa_el) or {} | ||||||
|             org = powa.get('data-org') |             org = powa.get('data-org') | ||||||
|             uuid = powa.get('data-uuid') |             uuid = powa.get('data-uuid') | ||||||
|             if org and uuid: |             if org and uuid: | ||||||
|                 entries.append('arcpublishing:%s:%s' % (org, uuid)) |                 entries.append(f'arcpublishing:{org}:{uuid}') | ||||||
|         return entries |         return entries | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor): | |||||||
|             elif stream_type in ('ts', 'hls'): |             elif stream_type in ('ts', 'hls'): | ||||||
|                 m3u8_formats = self._extract_m3u8_formats( |                 m3u8_formats = self._extract_m3u8_formats( | ||||||
|                     s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) |                     s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) | ||||||
|                 if all([f.get('acodec') == 'none' for f in m3u8_formats]): |                 if all(f.get('acodec') == 'none' for f in m3u8_formats): | ||||||
|                     continue |                     continue | ||||||
|                 for f in m3u8_formats: |                 for f in m3u8_formats: | ||||||
|                     height = f.get('height') |                     height = f.get('height') | ||||||
| @@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor): | |||||||
|             else: |             else: | ||||||
|                 vbr = int_or_none(s.get('bitrate')) |                 vbr = int_or_none(s.get('bitrate')) | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
|                     'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, |                     'format_id': join_nonempty(stream_type, vbr), | ||||||
|                     'vbr': vbr, |                     'vbr': vbr, | ||||||
|                     'width': int_or_none(s.get('width')), |                     'width': int_or_none(s.get('width')), | ||||||
|                     'height': int_or_none(s.get('height')), |                     'height': int_or_none(s.get('height')), | ||||||
|   | |||||||
| @@ -1,24 +1,25 @@ | |||||||
| import json | import functools | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .generic import GenericIE |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     OnDemandPagedList, | ||||||
|  |     bug_reports_message, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|     ExtractorError, |  | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|  |     jwt_decode_hs256, | ||||||
|  |     make_archive_id, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     qualities, |     parse_iso8601, | ||||||
|  |     remove_start, | ||||||
|     str_or_none, |     str_or_none, | ||||||
|     try_get, |  | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     unified_timestamp, |  | ||||||
|     update_url, |  | ||||||
|     update_url_query, |     update_url_query, | ||||||
|     url_or_none, |     url_or_none, | ||||||
|     xpath_text, |     xpath_text, | ||||||
| ) | ) | ||||||
| from ..compat import compat_etree_fromstring | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
| class ARDMediathekBaseIE(InfoExtractor): | class ARDMediathekBaseIE(InfoExtractor): | ||||||
| @@ -61,45 +62,6 @@ class ARDMediathekBaseIE(InfoExtractor): | |||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     def _ARD_extract_episode_info(self, title): |  | ||||||
|         """Try to extract season/episode data from the title.""" |  | ||||||
|         res = {} |  | ||||||
|         if not title: |  | ||||||
|             return res |  | ||||||
|  |  | ||||||
|         for pattern in [ |  | ||||||
|             # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" |  | ||||||
|             # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw |  | ||||||
|             r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*', |  | ||||||
|             # E.g.: title="Fritjof aus Norwegen (2) (AD)" |  | ||||||
|             # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ |  | ||||||
|             r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*', |  | ||||||
|             r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*', |  | ||||||
|             # E.g.: title="Folge 25/42: Symmetrie" |  | ||||||
|             # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ |  | ||||||
|             # E.g.: title="Folge 1063 - Vertrauen" |  | ||||||
|             # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ |  | ||||||
|             r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*', |  | ||||||
|         ]: |  | ||||||
|             m = re.match(pattern, title) |  | ||||||
|             if m: |  | ||||||
|                 groupdict = m.groupdict() |  | ||||||
|                 res['season_number'] = int_or_none(groupdict.get('season_number')) |  | ||||||
|                 res['episode_number'] = int_or_none(groupdict.get('episode_number')) |  | ||||||
|                 res['episode'] = str_or_none(groupdict.get('episode')) |  | ||||||
|                 # Build the episode title by removing numeric episode information: |  | ||||||
|                 if groupdict.get('ep_info') and not res['episode']: |  | ||||||
|                     res['episode'] = str_or_none( |  | ||||||
|                         title.replace(groupdict.get('ep_info'), '')) |  | ||||||
|                 if res['episode']: |  | ||||||
|                     res['episode'] = res['episode'].strip() |  | ||||||
|                 break |  | ||||||
|  |  | ||||||
|         # As a fallback use the whole title as the episode name: |  | ||||||
|         if not res.get('episode'): |  | ||||||
|             res['episode'] = title.strip() |  | ||||||
|         return res |  | ||||||
|  |  | ||||||
|     def _extract_formats(self, media_info, video_id): |     def _extract_formats(self, media_info, video_id): | ||||||
|         type_ = media_info.get('_type') |         type_ = media_info.get('_type') | ||||||
|         media_array = media_info.get('_mediaArray', []) |         media_array = media_info.get('_mediaArray', []) | ||||||
| @@ -123,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor): | |||||||
|                         formats.extend(self._extract_f4m_formats( |                         formats.extend(self._extract_f4m_formats( | ||||||
|                             update_url_query(stream_url, { |                             update_url_query(stream_url, { | ||||||
|                                 'hdcore': '3.1.1', |                                 'hdcore': '3.1.1', | ||||||
|                                 'plugin': 'aasp-3.1.1.69.124' |                                 'plugin': 'aasp-3.1.1.69.124', | ||||||
|                             }), video_id, f4m_id='hds', fatal=False)) |                             }), video_id, f4m_id='hds', fatal=False)) | ||||||
|                     elif ext == 'm3u8': |                     elif ext == 'm3u8': | ||||||
|                         formats.extend(self._extract_m3u8_formats( |                         formats.extend(self._extract_m3u8_formats( | ||||||
| @@ -134,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor): | |||||||
|                             f = { |                             f = { | ||||||
|                                 'url': server, |                                 'url': server, | ||||||
|                                 'play_path': stream_url, |                                 'play_path': stream_url, | ||||||
|                                 'format_id': 'a%s-rtmp-%s' % (num, quality), |                                 'format_id': f'a{num}-rtmp-{quality}', | ||||||
|                             } |                             } | ||||||
|                         else: |                         else: | ||||||
|                             f = { |                             f = { | ||||||
|                                 'url': stream_url, |                                 'url': stream_url, | ||||||
|                                 'format_id': 'a%s-%s-%s' % (num, ext, quality) |                                 'format_id': f'a{num}-{ext}-{quality}', | ||||||
|                             } |                             } | ||||||
|                         m = re.search( |                         m = re.search( | ||||||
|                             r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', |                             r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', | ||||||
| @@ -155,144 +117,12 @@ class ARDMediathekBaseIE(InfoExtractor): | |||||||
|         return formats |         return formats | ||||||
|  |  | ||||||
|  |  | ||||||
| class ARDMediathekIE(ARDMediathekBaseIE): |  | ||||||
|     IE_NAME = 'ARD:mediathek' |  | ||||||
|     _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' |  | ||||||
|  |  | ||||||
|     _TESTS = [{ |  | ||||||
|         # available till 26.07.2022 |  | ||||||
|         'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '44726822', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', |  | ||||||
|             'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', |  | ||||||
|             'duration': 1740, |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             # m3u8 download |  | ||||||
|             'skip_download': True, |  | ||||||
|         } |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         # audio |  | ||||||
|         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         # audio |  | ||||||
|         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     @classmethod |  | ||||||
|     def suitable(cls, url): |  | ||||||
|         return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         # determine video id from url |  | ||||||
|         m = self._match_valid_url(url) |  | ||||||
|  |  | ||||||
|         document_id = None |  | ||||||
|  |  | ||||||
|         numid = re.search(r'documentId=([0-9]+)', url) |  | ||||||
|         if numid: |  | ||||||
|             document_id = video_id = numid.group(1) |  | ||||||
|         else: |  | ||||||
|             video_id = m.group('video_id') |  | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |  | ||||||
|  |  | ||||||
|         ERRORS = ( |  | ||||||
|             ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), |  | ||||||
|             ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', |  | ||||||
|              'Video %s is no longer available'), |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         for pattern, message in ERRORS: |  | ||||||
|             if pattern in webpage: |  | ||||||
|                 raise ExtractorError(message % video_id, expected=True) |  | ||||||
|  |  | ||||||
|         if re.search(r'[\?&]rss($|[=&])', url): |  | ||||||
|             doc = compat_etree_fromstring(webpage.encode('utf-8')) |  | ||||||
|             if doc.tag == 'rss': |  | ||||||
|                 return GenericIE()._extract_rss(url, video_id, doc) |  | ||||||
|  |  | ||||||
|         title = self._og_search_title(webpage, default=None) or self._html_search_regex( |  | ||||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', |  | ||||||
|              r'<meta name="dcterms\.title" content="(.*?)"/>', |  | ||||||
|              r'<h4 class="headline">(.*?)</h4>', |  | ||||||
|              r'<title[^>]*>(.*?)</title>'], |  | ||||||
|             webpage, 'title') |  | ||||||
|         description = self._og_search_description(webpage, default=None) or self._html_search_meta( |  | ||||||
|             'dcterms.abstract', webpage, 'description', default=None) |  | ||||||
|         if description is None: |  | ||||||
|             description = self._html_search_meta( |  | ||||||
|                 'description', webpage, 'meta description', default=None) |  | ||||||
|         if description is None: |  | ||||||
|             description = self._html_search_regex( |  | ||||||
|                 r'<p\s+class="teasertext">(.+?)</p>', |  | ||||||
|                 webpage, 'teaser text', default=None) |  | ||||||
|  |  | ||||||
|         # Thumbnail is sometimes not present. |  | ||||||
|         # It is in the mobile version, but that seems to use a different URL |  | ||||||
|         # structure altogether. |  | ||||||
|         thumbnail = self._og_search_thumbnail(webpage, default=None) |  | ||||||
|  |  | ||||||
|         media_streams = re.findall(r'''(?x) |  | ||||||
|             mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* |  | ||||||
|             "([^"]+)"''', webpage) |  | ||||||
|  |  | ||||||
|         if media_streams: |  | ||||||
|             QUALITIES = qualities(['lo', 'hi', 'hq']) |  | ||||||
|             formats = [] |  | ||||||
|             for furl in set(media_streams): |  | ||||||
|                 if furl.endswith('.f4m'): |  | ||||||
|                     fid = 'f4m' |  | ||||||
|                 else: |  | ||||||
|                     fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) |  | ||||||
|                     fid = fid_m.group(1) if fid_m else None |  | ||||||
|                 formats.append({ |  | ||||||
|                     'quality': QUALITIES(fid), |  | ||||||
|                     'format_id': fid, |  | ||||||
|                     'url': furl, |  | ||||||
|                 }) |  | ||||||
|             info = { |  | ||||||
|                 'formats': formats, |  | ||||||
|             } |  | ||||||
|         else:  # request JSON file |  | ||||||
|             if not document_id: |  | ||||||
|                 video_id = self._search_regex( |  | ||||||
|                     (r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'), |  | ||||||
|                     webpage, 'media id', default=None) |  | ||||||
|             info = self._extract_media_info( |  | ||||||
|                 'http://www.ardmediathek.de/play/media/%s' % video_id, |  | ||||||
|                 webpage, video_id) |  | ||||||
|  |  | ||||||
|         info.update({ |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|         }) |  | ||||||
|         info.update(self._ARD_extract_episode_info(info['title'])) |  | ||||||
|  |  | ||||||
|         return info |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ARDIE(InfoExtractor): | class ARDIE(InfoExtractor): | ||||||
|     _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' |     _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # available till 7.12.2023 |         # available till 7.12.2023 | ||||||
|         'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', |         'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', | ||||||
|         'md5': 'a438f671e87a7eba04000336a119ccc4', |         'md5': '94812e6438488fb923c361a44469614b', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'maischberger-video-424', |             'id': 'maischberger-video-424', | ||||||
|             'display_id': 'maischberger-video-424', |             'display_id': 'maischberger-video-424', | ||||||
| @@ -399,31 +229,36 @@ class ARDIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
| class ARDBetaMediathekIE(ARDMediathekBaseIE): | class ARDBetaMediathekIE(InfoExtractor): | ||||||
|     _VALID_URL = r'''(?x)https:// |     IE_NAME = 'ARDMediathek' | ||||||
|  |     _VALID_URL = r'''(?x)https?:// | ||||||
|         (?:(?:beta|www)\.)?ardmediathek\.de/ |         (?:(?:beta|www)\.)?ardmediathek\.de/ | ||||||
|         (?:(?P<client>[^/]+)/)? |         (?:[^/]+/)? | ||||||
|         (?:player|live|video|(?P<playlist>sendung|sammlung))/ |         (?:player|live|video)/ | ||||||
|         (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? |         (?:[^?#]+/)? | ||||||
|         (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) |         (?P<id>[a-zA-Z0-9]+) | ||||||
|         (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' |         /?(?:[?#]|$)''' | ||||||
|  |     _GEO_COUNTRIES = ['DE'] | ||||||
|  |     _TOKEN_URL = 'https://sso.ardmediathek.de/sso/token' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI', |         'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', | ||||||
|         'md5': '3fd5fead7a370a819341129c8d713136', |         'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen', |             'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', | ||||||
|             'id': '12172961', |             'id': '12939099', | ||||||
|             'title': 'Wolfsland - Die traurigen Schwestern', |             'title': 'Liebe auf vier Pfoten', | ||||||
|             'description': r're:^Als der Polizeiobermeister Raaben', |             'description': r're:^Claudia Schmitt, Anwältin in Salzburg', | ||||||
|             'duration': 5241, |             'duration': 5222, | ||||||
|             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957', |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', | ||||||
|             'timestamp': 1670710500, |             'timestamp': 1701343800, | ||||||
|             'upload_date': '20221210', |             'upload_date': '20231130', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'age_limit': 12, |             'episode': 'Liebe auf vier Pfoten', | ||||||
|             'episode': 'Wolfsland - Die traurigen Schwestern', |             'series': 'Filme im MDR', | ||||||
|             'series': 'Filme im MDR' |             'age_limit': 0, | ||||||
|  |             'channel': 'MDR', | ||||||
|  |             '_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'], | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', |         'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', | ||||||
| @@ -450,11 +285,49 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|             'timestamp': 1636398000, |             'timestamp': 1636398000, | ||||||
|             'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', |             'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', | ||||||
|             'upload_date': '20211108', |             'upload_date': '20211108', | ||||||
|             'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste', |             'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', | ||||||
|             'duration': 915, |             'duration': 915, | ||||||
|             'episode': 'tagesschau, 20:00 Uhr', |             'episode': 'tagesschau, 20:00 Uhr', | ||||||
|             'series': 'tagesschau', |             'series': 'tagesschau', | ||||||
|             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49', |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', | ||||||
|  |             'channel': 'ARD-Aktuell', | ||||||
|  |             '_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'], | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', | ||||||
|  |         'md5': 'c428b9effff18ff624d4f903bda26315', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '94834686', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'duration': 2670, | ||||||
|  |             'episode': '7 Tage ... unter harten Jungs', | ||||||
|  |             'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', | ||||||
|  |             'upload_date': '20231005', | ||||||
|  |             'timestamp': 1696491171, | ||||||
|  |             'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', | ||||||
|  |             'series': '7 Tage ...', | ||||||
|  |             'channel': 'HR', | ||||||
|  |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a', | ||||||
|  |             'title': '7 Tage ... unter harten Jungs', | ||||||
|  |             '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'], | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '13847165', | ||||||
|  |             'chapters': 'count:8', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'channel': 'WDR', | ||||||
|  |             'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz', | ||||||
|  |             'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024', | ||||||
|  |             'series': 'Lokalzeit aus Düsseldorf', | ||||||
|  |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c', | ||||||
|  |             'title': 'Lokalzeit aus Düsseldorf | 31.10.2024', | ||||||
|  |             'upload_date': '20241031', | ||||||
|  |             'timestamp': 1730399400, | ||||||
|  |             'description': 'md5:12db30b3b706314efe3778b8df1a7058', | ||||||
|  |             'duration': 1759, | ||||||
|  |             '_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'], | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', |         'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | ||||||
| @@ -471,203 +344,260 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', |         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _extract_episode_info(self, title): | ||||||
|  |         patterns = [ | ||||||
|  |             # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" | ||||||
|  |             # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw | ||||||
|  |             r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*', | ||||||
|  |             # E.g.: title="Fritjof aus Norwegen (2) (AD)" | ||||||
|  |             # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ | ||||||
|  |             r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*', | ||||||
|  |             r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*', | ||||||
|  |             # E.g.: title="Folge 25/42: Symmetrie" | ||||||
|  |             # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ | ||||||
|  |             # E.g.: title="Folge 1063 - Vertrauen" | ||||||
|  |             # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ | ||||||
|  |             r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*', | ||||||
|  |             # As a fallback use the full title | ||||||
|  |             r'(?P<title>.*)', | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, { | ||||||
|  |             'season_number': ('season_number', {int_or_none}), | ||||||
|  |             'episode_number': ('episode_number', {int_or_none}), | ||||||
|  |             'episode': (( | ||||||
|  |                 ('episode', {str_or_none}), | ||||||
|  |                 ('ep_info', {lambda x: title.replace(x, '')}), | ||||||
|  |                 ('title', {str}), | ||||||
|  |             ), {str.strip}), | ||||||
|  |         }), get_all=False) | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  |         query = {'embedded': 'false', 'mcV6': 'true'} | ||||||
|  |         headers = {} | ||||||
|  |  | ||||||
|  |         if self._get_cookies(self._TOKEN_URL).get('ams'): | ||||||
|  |             token = self._download_json( | ||||||
|  |                 self._TOKEN_URL, display_id, 'Fetching token for age verification', | ||||||
|  |                 'Unable to fetch age verification token', fatal=False) | ||||||
|  |             id_token = traverse_obj(token, ('idToken', {str})) | ||||||
|  |             decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict})) | ||||||
|  |             user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False) | ||||||
|  |             if not user_id: | ||||||
|  |                 self.report_warning('Unable to extract token, continuing without authentication') | ||||||
|  |             else: | ||||||
|  |                 headers['x-authorization'] = f'Bearer {id_token}' | ||||||
|  |                 query['userId'] = user_id | ||||||
|  |                 if decoded_token.get('age_rating') != 18: | ||||||
|  |                     self.report_warning('Account is not verified as 18+; video may be unavailable') | ||||||
|  |  | ||||||
|  |         page_data = self._download_json( | ||||||
|  |             f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', | ||||||
|  |             display_id, query=query, headers=headers) | ||||||
|  |  | ||||||
|  |         # For user convenience we use the old contentId instead of the longer crid | ||||||
|  |         # Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283 | ||||||
|  |         old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int})) | ||||||
|  |         if old_id is not None: | ||||||
|  |             video_id = str(old_id) | ||||||
|  |             archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)] | ||||||
|  |         else: | ||||||
|  |             self.report_warning(f'Could not extract contentId{bug_reports_message()}') | ||||||
|  |             video_id = display_id | ||||||
|  |             archive_ids = None | ||||||
|  |  | ||||||
|  |         player_data = traverse_obj( | ||||||
|  |             page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False) | ||||||
|  |         is_live = player_data.get('type') == 'player_live' | ||||||
|  |         media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict})) | ||||||
|  |  | ||||||
|  |         if player_data.get('blockedByFsk'): | ||||||
|  |             self.raise_login_required('This video is only available for age verified users or after 22:00') | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         subtitles = {} | ||||||
|  |         for stream in traverse_obj(media_data, ('streams', ..., {dict})): | ||||||
|  |             kind = stream.get('kind') | ||||||
|  |             # Prioritize main stream over sign language and others | ||||||
|  |             preference = 1 if kind == 'main' else None | ||||||
|  |             for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))): | ||||||
|  |                 media_url = media['url'] | ||||||
|  |  | ||||||
|  |                 audio_kind = traverse_obj(media, ( | ||||||
|  |                     'audios', 0, 'kind', {str}), default='').replace('standard', '') | ||||||
|  |                 lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu' | ||||||
|  |                 lang = join_nonempty(lang_code, audio_kind) | ||||||
|  |                 language_preference = 10 if lang == 'deu' else -10 | ||||||
|  |  | ||||||
|  |                 if determine_ext(media_url) == 'm3u8': | ||||||
|  |                     fmts, subs = self._extract_m3u8_formats_and_subtitles( | ||||||
|  |                         media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live) | ||||||
|  |                     for f in fmts: | ||||||
|  |                         f['language'] = lang | ||||||
|  |                         f['language_preference'] = language_preference | ||||||
|  |                     formats.extend(fmts) | ||||||
|  |                     self._merge_subtitles(subs, target=subtitles) | ||||||
|  |                 else: | ||||||
|  |                     formats.append({ | ||||||
|  |                         'url': media_url, | ||||||
|  |                         'format_id': f'http-{kind}', | ||||||
|  |                         'preference': preference, | ||||||
|  |                         'language': lang, | ||||||
|  |                         'language_preference': language_preference, | ||||||
|  |                         **traverse_obj(media, { | ||||||
|  |                             'format_note': ('forcedLabel', {str}), | ||||||
|  |                             'width': ('maxHResolutionPx', {int_or_none}), | ||||||
|  |                             'height': ('maxVResolutionPx', {int_or_none}), | ||||||
|  |                             'vcodec': ('videoCodec', {str}), | ||||||
|  |                         }), | ||||||
|  |                     }) | ||||||
|  |  | ||||||
|  |         for sub in traverse_obj(media_data, ('subtitles', ..., {dict})): | ||||||
|  |             for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))): | ||||||
|  |                 subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({ | ||||||
|  |                     'url': sources['url'], | ||||||
|  |                     'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|  |         age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none})) | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             'subtitles': subtitles, | ||||||
|  |             'is_live': is_live, | ||||||
|  |             'age_limit': age_limit, | ||||||
|  |             **traverse_obj(media_data, { | ||||||
|  |                 'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), { | ||||||
|  |                     'start_time': ('chapterTime', {int_or_none}), | ||||||
|  |                     'title': ('chapterTitle', {str}), | ||||||
|  |                 }), | ||||||
|  |             }), | ||||||
|  |             **traverse_obj(media_data, ('meta', { | ||||||
|  |                 'title': 'title', | ||||||
|  |                 'description': 'synopsis', | ||||||
|  |                 'timestamp': ('broadcastedOnDateTime', {parse_iso8601}), | ||||||
|  |                 'series': 'seriesTitle', | ||||||
|  |                 'thumbnail': ('images', 0, 'url', {url_or_none}), | ||||||
|  |                 'duration': ('durationSeconds', {int_or_none}), | ||||||
|  |                 'channel': 'clipSourceName', | ||||||
|  |             })), | ||||||
|  |             **self._extract_episode_info(page_data.get('title')), | ||||||
|  |             '_old_archive_ids': archive_ids, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ARDMediathekCollectionIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'''(?x)https?:// | ||||||
|  |         (?:(?:beta|www)\.)?ardmediathek\.de/ | ||||||
|  |         (?:[^/?#]+/)? | ||||||
|  |         (?P<playlist>sendung|serie|sammlung)/ | ||||||
|  |         (?:(?P<display_id>[^?#]+?)/)? | ||||||
|  |         (?P<id>[a-zA-Z0-9]+) | ||||||
|  |         (?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)''' | ||||||
|  |     _GEO_COUNTRIES = ['DE'] | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV', | ||||||
|  |             'display_id': 'quiz/staffel-1-originalversion', | ||||||
|  |             'title': 'Staffel 1 Originalversion', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 3, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD', | ||||||
|  |             'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription', | ||||||
|  |             'title': 'Staffel 4 mit Audiodeskription', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 12, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1', | ||||||
|  |             'display_id': 'babylon-berlin/staffel-1', | ||||||
|  |             'title': 'Staffel 1', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 8, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', | ||||||
|  |             'display_id': 'tatort', | ||||||
|  |             'title': 'Tatort', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 500, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5eOHzt8XB2sqeFXbIoJlg2', | ||||||
|  |             'display_id': 'die-kirche-bleibt-im-dorf', | ||||||
|  |             'title': 'Die Kirche bleibt im Dorf', | ||||||
|  |             'description': 'Die Kirche bleibt im Dorf', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 4, | ||||||
|     }, { |     }, { | ||||||
|         # playlist of type 'sendung' |         # playlist of type 'sendung' | ||||||
|         'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', |         'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # playlist of type 'serie' | ||||||
|  |         'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1', | ||||||
|  |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         # playlist of type 'sammlung' |         # playlist of type 'sammlung' | ||||||
|         'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', |         'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): |     _PAGE_SIZE = 100 | ||||||
|         """ Query the ARD server for playlist information |  | ||||||
|         and returns the data in "raw" format """ |  | ||||||
|         if mode == 'sendung': |  | ||||||
|             graphQL = json.dumps({ |  | ||||||
|                 'query': '''{ |  | ||||||
|                     showPage( |  | ||||||
|                         client: "%s" |  | ||||||
|                         showId: "%s" |  | ||||||
|                         pageNumber: %d |  | ||||||
|                     ) { |  | ||||||
|                         pagination { |  | ||||||
|                             pageSize |  | ||||||
|                             totalElements |  | ||||||
|                         } |  | ||||||
|                         teasers {        # Array |  | ||||||
|                             mediumTitle |  | ||||||
|                             links { target { id href title } } |  | ||||||
|                             type |  | ||||||
|                         } |  | ||||||
|                     }}''' % (client, playlist_id, pageNumber), |  | ||||||
|             }).encode() |  | ||||||
|         else:  # mode == 'sammlung' |  | ||||||
|             graphQL = json.dumps({ |  | ||||||
|                 'query': '''{ |  | ||||||
|                     morePage( |  | ||||||
|                         client: "%s" |  | ||||||
|                         compilationId: "%s" |  | ||||||
|                         pageNumber: %d |  | ||||||
|                     ) { |  | ||||||
|                         widget { |  | ||||||
|                             pagination { |  | ||||||
|                                 pageSize |  | ||||||
|                                 totalElements |  | ||||||
|                             } |  | ||||||
|                             teasers {        # Array |  | ||||||
|                                 mediumTitle |  | ||||||
|                                 links { target { id href title } } |  | ||||||
|                                 type |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     }}''' % (client, playlist_id, pageNumber), |  | ||||||
|             }).encode() |  | ||||||
|         # Ressources for ARD graphQL debugging: |  | ||||||
|         # https://api-test.ardmediathek.de/public-gateway |  | ||||||
|         show_page = self._download_json( |  | ||||||
|             'https://api.ardmediathek.de/public-gateway', |  | ||||||
|             '[Playlist] %s' % display_id, |  | ||||||
|             data=graphQL, |  | ||||||
|             headers={'Content-Type': 'application/json'})['data'] |  | ||||||
|         # align the structure of the returned data: |  | ||||||
|         if mode == 'sendung': |  | ||||||
|             show_page = show_page['showPage'] |  | ||||||
|         else:  # mode == 'sammlung' |  | ||||||
|             show_page = show_page['morePage']['widget'] |  | ||||||
|         return show_page |  | ||||||
|  |  | ||||||
|     def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): |  | ||||||
|         """ Collects all playlist entries and returns them as info dict. |  | ||||||
|         Supports playlists of mode 'sendung' and 'sammlung', and also nested |  | ||||||
|         playlists. """ |  | ||||||
|         entries = [] |  | ||||||
|         pageNumber = 0 |  | ||||||
|         while True:  # iterate by pageNumber |  | ||||||
|             show_page = self._ARD_load_playlist_snipped( |  | ||||||
|                 playlist_id, display_id, client, mode, pageNumber) |  | ||||||
|             for teaser in show_page['teasers']:  # process playlist items |  | ||||||
|                 if '/compilation/' in teaser['links']['target']['href']: |  | ||||||
|                     # alternativ cond.: teaser['type'] == "compilation" |  | ||||||
|                     # => This is an nested compilation, e.g. like: |  | ||||||
|                     # https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/ |  | ||||||
|                     link_mode = 'sammlung' |  | ||||||
|                 else: |  | ||||||
|                     link_mode = 'video' |  | ||||||
|  |  | ||||||
|                 item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % ( |  | ||||||
|                     client, link_mode, display_id, |  | ||||||
|                     # perform HTLM quoting of episode title similar to ARD: |  | ||||||
|                     re.sub('^-|-$', '',  # remove '-' from begin/end |  | ||||||
|                            re.sub('[^a-zA-Z0-9]+', '-',  # replace special chars by - |  | ||||||
|                                   teaser['links']['target']['title'].lower() |  | ||||||
|                                   .replace('ä', 'ae').replace('ö', 'oe') |  | ||||||
|                                   .replace('ü', 'ue').replace('ß', 'ss'))), |  | ||||||
|                     teaser['links']['target']['id']) |  | ||||||
|                 entries.append(self.url_result( |  | ||||||
|                     item_url, |  | ||||||
|                     ie=ARDBetaMediathekIE.ie_key())) |  | ||||||
|  |  | ||||||
|             if (show_page['pagination']['pageSize'] * (pageNumber + 1) |  | ||||||
|                >= show_page['pagination']['totalElements']): |  | ||||||
|                 # we've processed enough pages to get all playlist entries |  | ||||||
|                 break |  | ||||||
|             pageNumber = pageNumber + 1 |  | ||||||
|  |  | ||||||
|         return self.playlist_result(entries, playlist_id, playlist_title=display_id) |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( |         playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group( | ||||||
|             'id', 'display_id', 'playlist', 'client', 'season') |             'id', 'display_id', 'playlist', 'season', 'version') | ||||||
|         display_id, client = display_id or video_id, client or 'ard' |  | ||||||
|  |  | ||||||
|         if playlist_type: |         def call_api(page_num): | ||||||
|             # TODO: Extract only specified season |             api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset' | ||||||
|             return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) |             return self._download_json( | ||||||
|  |                 f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id, | ||||||
|  |                 f'Downloading playlist page {page_num}', query={ | ||||||
|  |                     'pageNumber': page_num, | ||||||
|  |                     'pageSize': self._PAGE_SIZE, | ||||||
|  |                     **({ | ||||||
|  |                         'seasoned': 'true', | ||||||
|  |                         'seasonNumber': season_number, | ||||||
|  |                         'withOriginalversion': 'true' if version == 'OV' else 'false', | ||||||
|  |                         'withAudiodescription': 'true' if version == 'AD' else 'false', | ||||||
|  |                     } if season_number else {}), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|         player_page = self._download_json( |         def fetch_page(page_num): | ||||||
|             'https://api.ardmediathek.de/public-gateway', |             for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})): | ||||||
|             display_id, data=json.dumps({ |                 item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False) | ||||||
|                 'query': '''{ |                 if not item_id or item_id == playlist_id: | ||||||
|   playerPage(client:"%s", clipId: "%s") { |                     continue | ||||||
|     blockedByFsk |                 item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video' | ||||||
|     broadcastedOn |                 yield self.url_result( | ||||||
|     maturityContentRating |                     f'https://www.ardmediathek.de/{item_mode}/{item_id}', | ||||||
|     mediaCollection { |                     ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE), | ||||||
|       _duration |                     **traverse_obj(item, { | ||||||
|       _geoblocked |                         'id': ('id', {str}), | ||||||
|       _isLive |                         'title': ('longTitle', {str}), | ||||||
|       _mediaArray { |                         'duration': ('duration', {int_or_none}), | ||||||
|         _mediaStreamArray { |                         'timestamp': ('broadcastedOn', {parse_iso8601}), | ||||||
|           _quality |                     })) | ||||||
|           _server |  | ||||||
|           _stream |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|       _previewImage |  | ||||||
|       _subtitleUrl |  | ||||||
|       _type |  | ||||||
|     } |  | ||||||
|     show { |  | ||||||
|       title |  | ||||||
|     } |  | ||||||
|     image { |  | ||||||
|       src |  | ||||||
|     } |  | ||||||
|     synopsis |  | ||||||
|     title |  | ||||||
|     tracking { |  | ||||||
|       atiCustomVars { |  | ||||||
|         contentId |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }''' % (client, video_id), |  | ||||||
|             }).encode(), headers={ |  | ||||||
|                 'Content-Type': 'application/json' |  | ||||||
|             })['data']['playerPage'] |  | ||||||
|         title = player_page['title'] |  | ||||||
|         content_id = str_or_none(try_get( |  | ||||||
|             player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) |  | ||||||
|         media_collection = player_page.get('mediaCollection') or {} |  | ||||||
|         if not media_collection and content_id: |  | ||||||
|             media_collection = self._download_json( |  | ||||||
|                 'https://www.ardmediathek.de/play/media/' + content_id, |  | ||||||
|                 content_id, fatal=False) or {} |  | ||||||
|         info = self._parse_media_info( |  | ||||||
|             media_collection, content_id or video_id, |  | ||||||
|             player_page.get('blockedByFsk')) |  | ||||||
|         age_limit = None |  | ||||||
|         description = player_page.get('synopsis') |  | ||||||
|         maturity_content_rating = player_page.get('maturityContentRating') |  | ||||||
|         if maturity_content_rating: |  | ||||||
|             age_limit = int_or_none(maturity_content_rating.lstrip('FSK')) |  | ||||||
|         if not age_limit and description: |  | ||||||
|             age_limit = int_or_none(self._search_regex( |  | ||||||
|                 r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None)) |  | ||||||
|         info.update({ |  | ||||||
|             'age_limit': age_limit, |  | ||||||
|             'display_id': display_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'timestamp': unified_timestamp(player_page.get('broadcastedOn')), |  | ||||||
|             'series': try_get(player_page, lambda x: x['show']['title']), |  | ||||||
|             'thumbnail': (media_collection.get('_previewImage') |  | ||||||
|                           or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None)) |  | ||||||
|                           or self.get_thumbnail_from_html(display_id, url)), |  | ||||||
|         }) |  | ||||||
|         info.update(self._ARD_extract_episode_info(info['title'])) |  | ||||||
|         return info |  | ||||||
|  |  | ||||||
|     def get_thumbnail_from_html(self, display_id, url): |         page_data = call_api(0) | ||||||
|         webpage = self._download_webpage(url, display_id, fatal=False) or '' |         full_id = join_nonempty(playlist_id, season_number, version, delim='_') | ||||||
|         return ( |  | ||||||
|             self._og_search_thumbnail(webpage, default=None) |         return self.playlist_result( | ||||||
|             or self._html_search_meta('thumbnailUrl', webpage, default=None)) |             OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id, | ||||||
|  |             title=page_data.get('title'), description=page_data.get('synopsis')) | ||||||
|   | |||||||
| @@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor): | |||||||
|                 raise ExtractorError('Invalid URL', expected=True) |                 raise ExtractorError('Invalid URL', expected=True) | ||||||
|  |  | ||||||
|         media = self._download_json( |         media = self._download_json( | ||||||
|             'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), |             f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', | ||||||
|             video_id, query={ |             video_id, query={ | ||||||
|                 # https://video.qbrick.com/docs/api/examples/library-api.html |                 # https://video.qbrick.com/docs/api/examples/library-api.html | ||||||
|                 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', |                 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', | ||||||
| @@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor): | |||||||
|                             formats.extend(self._extract_f4m_formats( |                             formats.extend(self._extract_f4m_formats( | ||||||
|                                 href, video_id, f4m_id='hds', fatal=False)) |                                 href, video_id, f4m_id='hds', fatal=False)) | ||||||
|                         elif mime_type == 'application/dash+xml': |                         elif mime_type == 'application/dash+xml': | ||||||
|                             formats.extend(self._extract_f4m_formats( |                             formats.extend(self._extract_mpd_formats( | ||||||
|                                 href, video_id, f4m_id='hds', fatal=False)) |                                 href, video_id, mpd_id='dash', fatal=False)) | ||||||
|                         elif mime_type == 'application/vnd.ms-sstr+xml': |                         elif mime_type == 'application/vnd.ms-sstr+xml': | ||||||
|                             formats.extend(self._extract_ism_formats( |                             formats.extend(self._extract_ism_formats( | ||||||
|                                 href, video_id, ism_id='mss', fatal=False)) |                                 href, video_id, ism_id='mss', fatal=False)) | ||||||
|   | |||||||
| @@ -1,11 +1,9 @@ | |||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( |  | ||||||
|     compat_parse_qs, |  | ||||||
|     compat_urllib_parse_urlparse, |  | ||||||
| ) |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     format_field, |  | ||||||
|     float_or_none, |     float_or_none, | ||||||
|  |     format_field, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     remove_start, |     remove_start, | ||||||
| @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): | |||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'tags': ['linearna_algebra'], |             'tags': ['linearna_algebra'], | ||||||
|             'start_time': 10, |             'start_time': 10, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', |         'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor): | |||||||
|             'duration': float_or_none(video.get('duration'), 1000), |             'duration': float_or_none(video.get('duration'), 1000), | ||||||
|             'view_count': int_or_none(video.get('views')), |             'view_count': int_or_none(video.get('views')), | ||||||
|             'tags': video.get('hashtags'), |             'tags': video.get('hashtags'), | ||||||
|             'start_time': int_or_none(compat_parse_qs( |             'start_time': int_or_none(urllib.parse.parse_qs( | ||||||
|                 compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), |                 urllib.parse.urlparse(url).query).get('t', [None])[0]), | ||||||
|         } |         } | ||||||
|   | |||||||
							
								
								
									
										303
									
								
								plugins/youtube_download/yt_dlp/extractor/art19.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										303
									
								
								plugins/youtube_download/yt_dlp/extractor/art19.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,303 @@ | |||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Art19IE(InfoExtractor): | ||||||
|  |     _UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}' | ||||||
|  |     _VALID_URL = [ | ||||||
|  |         rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})', | ||||||
|  |         rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3', | ||||||
|  |     ] | ||||||
|  |     _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})'] | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': 'Why Did DeSantis Drop Out?', | ||||||
|  |             'series': 'The Daily Briefing', | ||||||
|  |             'release_timestamp': 1705941275, | ||||||
|  |             'description': 'md5:da38961da4a3f7e419471365e3c6b49f', | ||||||
|  |             'episode': 'Episode 582', | ||||||
|  |             'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$', | ||||||
|  |             'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d', | ||||||
|  |             'upload_date': '20240122', | ||||||
|  |             'timestamp': 1705940815, | ||||||
|  |             'episode_number': 582, | ||||||
|  |             'modified_date': '20240122', | ||||||
|  |             'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb', | ||||||
|  |             'modified_timestamp': 1705941275, | ||||||
|  |             'release_date': '20240122', | ||||||
|  |             'duration': 527.4, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '8319b776-4153-4d22-8630-631f204a03dd', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': 'Martha Stewart: The Homemaker Hustler Part 2', | ||||||
|  |             'modified_date': '20240116', | ||||||
|  |             'upload_date': '20240105', | ||||||
|  |             'modified_timestamp': 1705435802, | ||||||
|  |             'episode_id': '8319b776-4153-4d22-8630-631f204a03dd', | ||||||
|  |             'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75', | ||||||
|  |             'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$', | ||||||
|  |             'description': 'md5:4aa7cfd1358dc57e729835bc208d7893', | ||||||
|  |             'release_timestamp': 1705305660, | ||||||
|  |             'release_date': '20240115', | ||||||
|  |             'timestamp': 1704481536, | ||||||
|  |             'episode_number': 88, | ||||||
|  |             'series': 'Scamfluencers', | ||||||
|  |             'duration': 2588.37501, | ||||||
|  |             'episode': 'Episode 88', | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |     _WEBPAGE_TESTS = [{ | ||||||
|  |         'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': "'Verstappen wordt een synoniem voor Formule 1'", | ||||||
|  |             'season': 'Seizoen 6', | ||||||
|  |             'description': 'md5:39a7159a31c4cda312b2e893bdd5c071', | ||||||
|  |             'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7', | ||||||
|  |             'duration': 3061.82111, | ||||||
|  |             'series_id': '93f4e113-2a60-4609-a564-755058fa40d8', | ||||||
|  |             'release_date': '20231126', | ||||||
|  |             'modified_timestamp': 1701156004, | ||||||
|  |             'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$', | ||||||
|  |             'season_number': 6, | ||||||
|  |             'episode_number': 52, | ||||||
|  |             'modified_date': '20231128', | ||||||
|  |             'upload_date': '20231126', | ||||||
|  |             'timestamp': 1701025981, | ||||||
|  |             'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26', | ||||||
|  |             'series': 'De Boordradio', | ||||||
|  |             'release_timestamp': 1701026308, | ||||||
|  |             'episode': 'Episode 52', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': 'Larry Bucshon announces retirement from congress', | ||||||
|  |             'upload_date': '20240115', | ||||||
|  |             'episode_number': 148, | ||||||
|  |             'episode': 'Episode 148', | ||||||
|  |             'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$', | ||||||
|  |             'release_date': '20240115', | ||||||
|  |             'timestamp': 1705328205, | ||||||
|  |             'release_timestamp': 1705329275, | ||||||
|  |             'series': 'All INdiana Politics', | ||||||
|  |             'modified_date': '20240117', | ||||||
|  |             'modified_timestamp': 1705458901, | ||||||
|  |             'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1', | ||||||
|  |             'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0', | ||||||
|  |             'description': 'md5:53b5239e4d14973a87125c217c255b2a', | ||||||
|  |             'duration': 1256.18848, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def _extract_embed_urls(cls, url, webpage): | ||||||
|  |         yield from super()._extract_embed_urls(url, webpage) | ||||||
|  |         for episode_id in re.findall( | ||||||
|  |                 rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage): | ||||||
|  |             yield f'https://rss.art19.com/episodes/{episode_id}.mp3' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         episode_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         player_metadata = self._download_json( | ||||||
|  |             f'https://art19.com/episodes/{episode_id}', episode_id, | ||||||
|  |             note='Downloading player metadata', fatal=False, | ||||||
|  |             headers={'Accept': 'application/vnd.art19.v0+json'}) | ||||||
|  |         rss_metadata = self._download_json( | ||||||
|  |             f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False, | ||||||
|  |             note='Downloading RSS metadata') | ||||||
|  |  | ||||||
|  |         formats = [{ | ||||||
|  |             'format_id': 'direct', | ||||||
|  |             'url': f'https://rss.art19.com/episodes/{episode_id}.mp3', | ||||||
|  |             'vcodec': 'none', | ||||||
|  |             'acodec': 'mp3', | ||||||
|  |         }] | ||||||
|  |         for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)): | ||||||
|  |             if fmt_id == 'waveform_bin': | ||||||
|  |                 continue | ||||||
|  |             fmt_url = traverse_obj(fmt_data, ('url', {url_or_none})) | ||||||
|  |             if not fmt_url: | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 'format_id': fmt_id, | ||||||
|  |                 'url': fmt_url, | ||||||
|  |                 'vcodec': 'none', | ||||||
|  |                 'acodec': fmt_id, | ||||||
|  |                 'quality': -2 if fmt_id == 'ogg' else -1, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': episode_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             **traverse_obj(player_metadata, ('episode', { | ||||||
|  |                 'title': ('title', {str}), | ||||||
|  |                 'description': ('description_plain', {str}), | ||||||
|  |                 'episode_id': ('id', {str}), | ||||||
|  |                 'episode_number': ('episode_number', {int_or_none}), | ||||||
|  |                 'season_id': ('season_id', {str}), | ||||||
|  |                 'series_id': ('series_id', {str}), | ||||||
|  |                 'timestamp': ('created_at', {parse_iso8601}), | ||||||
|  |                 'release_timestamp': ('released_at', {parse_iso8601}), | ||||||
|  |                 'modified_timestamp': ('updated_at', {parse_iso8601}), | ||||||
|  |             })), | ||||||
|  |             **traverse_obj(rss_metadata, ('content', { | ||||||
|  |                 'title': ('episode_title', {str}), | ||||||
|  |                 'description': ('episode_description_plain', {str}), | ||||||
|  |                 'episode_id': ('episode_id', {str}), | ||||||
|  |                 'episode_number': ('episode_number', {int_or_none}), | ||||||
|  |                 'season': ('season_title', {str}), | ||||||
|  |                 'season_id': ('season_id', {str}), | ||||||
|  |                 'season_number': ('season_number', {int_or_none}), | ||||||
|  |                 'series': ('series_title', {str}), | ||||||
|  |                 'series_id': ('series_id', {str}), | ||||||
|  |                 'thumbnail': ('cover_image', {url_or_none}), | ||||||
|  |                 'duration': ('duration', {float_or_none}), | ||||||
|  |             })), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Art19ShowIE(InfoExtractor): | ||||||
|  |     _VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?' | ||||||
|  |     _VALID_URL = [ | ||||||
|  |         rf'{_VALID_URL_BASE}(?:$|[#?])', | ||||||
|  |         r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])', | ||||||
|  |     ] | ||||||
|  |     _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])'] | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0', | ||||||
|  |             'display_id': 'echt-gebeurd', | ||||||
|  |             'title': 'Echt Gebeurd', | ||||||
|  |             'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560', | ||||||
|  |             'timestamp': 1492642167, | ||||||
|  |             'upload_date': '20170419', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': 'count:7', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 425, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.art19.com/shows/echt-gebeurd', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0', | ||||||
|  |             'display_id': 'echt-gebeurd', | ||||||
|  |             'title': 'Echt Gebeurd', | ||||||
|  |             'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560', | ||||||
|  |             'timestamp': 1492642167, | ||||||
|  |             'upload_date': '20170419', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': 'count:7', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 425, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://rss.art19.com/scamfluencers', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75', | ||||||
|  |             'display_id': 'scamfluencers', | ||||||
|  |             'title': 'Scamfluencers', | ||||||
|  |             'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7', | ||||||
|  |             'timestamp': 1647368573, | ||||||
|  |             'upload_date': '20220315', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': [], | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 90, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://art19.com/shows/enthuellt/embed', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c', | ||||||
|  |             'display_id': 'enthuellt', | ||||||
|  |             'title': 'Enthüllt', | ||||||
|  |             'description': 'md5:17752246643414a2fd51744fc9a1c08e', | ||||||
|  |             'timestamp': 1601645860, | ||||||
|  |             'upload_date': '20201002', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': 'count:10', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 10, | ||||||
|  |     }] | ||||||
|  |     _WEBPAGE_TESTS = [{ | ||||||
|  |         'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21', | ||||||
|  |             'display_id': 'deconstructing-yourself', | ||||||
|  |             'title': 'Deconstructing Yourself', | ||||||
|  |             'description': 'md5:dab5082b28b248a35476abf64768854d', | ||||||
|  |             'timestamp': 1570581181, | ||||||
|  |             'upload_date': '20191009', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': 'count:5', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 80, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/', | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': '9dfa2c37-ab87-4c13-8388-4897914313ec', | ||||||
|  |             'display_id': 'the-ben-joravsky-show', | ||||||
|  |             'title': 'The Ben Joravsky Show', | ||||||
|  |             'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a', | ||||||
|  |             'timestamp': 1550875095, | ||||||
|  |             'upload_date': '20190222', | ||||||
|  |             'modified_timestamp': int, | ||||||
|  |             'modified_date': str, | ||||||
|  |             'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'], | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 1900, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def _extract_embed_urls(cls, url, webpage): | ||||||
|  |         yield from super()._extract_embed_urls(url, webpage) | ||||||
|  |         for series_id in re.findall( | ||||||
|  |                 r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage): | ||||||
|  |             yield f'https://art19.com/shows/{series_id}' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         series_id = self._match_id(url) | ||||||
|  |         series_metadata = self._download_json( | ||||||
|  |             f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata', | ||||||
|  |             headers={'Accept': 'application/vnd.art19.v0+json'}) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': [ | ||||||
|  |                 self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE) | ||||||
|  |                 for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str})) | ||||||
|  |             ], | ||||||
|  |             **traverse_obj(series_metadata, ('series', { | ||||||
|  |                 'id': ('id', {str}), | ||||||
|  |                 'display_id': ('slug', {str}), | ||||||
|  |                 'title': ('title', {str}), | ||||||
|  |                 'description': ('description_plain', {str}), | ||||||
|  |                 'timestamp': ('created_at', {parse_iso8601}), | ||||||
|  |                 'modified_timestamp': ('updated_at', {parse_iso8601}), | ||||||
|  |             })), | ||||||
|  |             'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})), | ||||||
|  |         } | ||||||
| @@ -5,6 +5,7 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     GeoRestrictedError, |     GeoRestrictedError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     parse_qs, |     parse_qs, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
| @@ -19,46 +20,22 @@ class ArteTVBaseIE(InfoExtractor): | |||||||
|  |  | ||||||
|  |  | ||||||
| class ArteTVIE(ArteTVBaseIE): | class ArteTVIE(ArteTVBaseIE): | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = rf'''(?x) | ||||||
|                     (?:https?:// |                     (?:https?:// | ||||||
|                         (?: |                         (?: | ||||||
|                             (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos| |                             (?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos| | ||||||
|                             api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s) |                             api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES}) | ||||||
|                         ) |                         ) | ||||||
|                     |arte://program) |                     |arte://program) | ||||||
|                         /(?P<id>\d{6}-\d{3}-[AF]|LIVE) |                         /(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE) | ||||||
|                     ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} |                     ''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', |         'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '100103-000-A', |  | ||||||
|             'title': 'USA: Dyskryminacja na porodówce', |  | ||||||
|             'description': 'md5:242017b7cce59ffae340a54baefcafb1', |  | ||||||
|             'alt_title': 'ARTE Reportage', |  | ||||||
|             'upload_date': '20201103', |  | ||||||
|             'duration': 554, |  | ||||||
|             'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530', |  | ||||||
|             'timestamp': 1604417980, |  | ||||||
|             'ext': 'mp4', |  | ||||||
|         }, |  | ||||||
|         'params': {'skip_download': 'm3u8'} |  | ||||||
|     }, { |     }, { | ||||||
|         'note': 'No alt_title', |         'note': 'No alt_title', | ||||||
|         'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/', |         'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/', | ||||||
|         'info_dict': { |         'only_matching': True, | ||||||
|             'id': '110371-000-A', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'upload_date': '20220718', |  | ||||||
|             'duration': 154, |  | ||||||
|             'timestamp': 1658162460, |  | ||||||
|             'description': 'md5:5890f36fe7dccfadb8b7c0891de54786', |  | ||||||
|             'title': 'La chaleur, supplice des arbres de rue', |  | ||||||
|             'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530', |  | ||||||
|         }, |  | ||||||
|         'params': {'skip_download': 'm3u8'} |  | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A', |         'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -67,19 +44,38 @@ class ArteTVIE(ArteTVBaseIE): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', |         'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '110203-006-A', |             'id': '109067-000-A', | ||||||
|             'chapters': 'count:16', |             'ext': 'mp4', | ||||||
|             'description': 'md5:cf592f1df52fe52007e3f8eac813c084', |             'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', | ||||||
|             'alt_title': 'Zaz', |             'timestamp': 1713927600, | ||||||
|             'title': 'Baloise Session 2022', |             'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530', | ||||||
|             'timestamp': 1668445200, |             'duration': 7599, | ||||||
|             'duration': 4054, |             'title': 'La loi de Téhéran', | ||||||
|             'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', |             'upload_date': '20240424', | ||||||
|             'upload_date': '20221114', |             'subtitles': { | ||||||
|  |                 'fr': 'mincount:1', | ||||||
|  |                 'fr-acc': 'mincount:1', | ||||||
|  |                 'fr-forced': 'mincount:1', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'note': 'age-restricted', | ||||||
|  |         'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '006785-000-A', | ||||||
|  |             'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba', | ||||||
|  |             'title': 'The Element of Crime', | ||||||
|  |             'timestamp': 1696111200, | ||||||
|  |             'duration': 5849, | ||||||
|  |             'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530', | ||||||
|  |             'upload_date': '20230930', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|         }, |         }, | ||||||
|         'expected_warnings': ['geo restricted'] |         'skip': '404 Not Found', | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     _GEO_BYPASS = True |     _GEO_BYPASS = True | ||||||
| @@ -130,13 +126,27 @@ class ArteTVIE(ArteTVBaseIE): | |||||||
|         ), |         ), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _fix_accessible_subs_locale(subs): | ||||||
|  |         updated_subs = {} | ||||||
|  |         for lang, sub_formats in subs.items(): | ||||||
|  |             for fmt in sub_formats: | ||||||
|  |                 url = fmt.get('url') or '' | ||||||
|  |                 suffix = ('acc' if url.endswith('-MAL.m3u8') | ||||||
|  |                           else 'forced' if '_VO' not in url | ||||||
|  |                           else None) | ||||||
|  |                 updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt) | ||||||
|  |         return updated_subs | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = self._match_valid_url(url) |         mobj = self._match_valid_url(url) | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         lang = mobj.group('lang') or mobj.group('lang_2') |         lang = mobj.group('lang') or mobj.group('lang_2') | ||||||
|         langauge_code = self._LANG_MAP.get(lang) |         language_code = self._LANG_MAP.get(lang) | ||||||
|  |  | ||||||
|         config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id) |         config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ | ||||||
|  |             'x-validated-age': '18', | ||||||
|  |         }) | ||||||
|  |  | ||||||
|         geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} |         geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} | ||||||
|         if geoblocking.get('restrictedArea'): |         if geoblocking.get('restrictedArea'): | ||||||
| @@ -160,16 +170,16 @@ class ArteTVIE(ArteTVBaseIE): | |||||||
|             m = self._VERSION_CODE_RE.match(stream_version_code) |             m = self._VERSION_CODE_RE.match(stream_version_code) | ||||||
|             if m: |             if m: | ||||||
|                 lang_pref = int(''.join('01'[x] for x in ( |                 lang_pref = int(''.join('01'[x] for x in ( | ||||||
|                     m.group('vlang') == langauge_code,      # we prefer voice in the requested language |                     m.group('vlang') == language_code,      # we prefer voice in the requested language | ||||||
|                     not m.group('audio_desc'),              # and not the audio description version |                     not m.group('audio_desc'),              # and not the audio description version | ||||||
|                     bool(m.group('original_voice')),        # but if voice is not in the requested language, at least choose the original voice |                     bool(m.group('original_voice')),        # but if voice is not in the requested language, at least choose the original voice | ||||||
|                     m.group('sub_lang') == langauge_code,   # if subtitles are present, we prefer them in the requested language |                     m.group('sub_lang') == language_code,   # if subtitles are present, we prefer them in the requested language | ||||||
|                     not m.group('has_sub'),                 # but we prefer no subtitles otherwise |                     not m.group('has_sub'),                 # but we prefer no subtitles otherwise | ||||||
|                     not m.group('sdh_sub'),                 # and we prefer not the hard-of-hearing subtitles if there are subtitles |                     not m.group('sdh_sub'),                 # and we prefer not the hard-of-hearing subtitles if there are subtitles | ||||||
|                 ))) |                 ))) | ||||||
|  |  | ||||||
|             short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') |             short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') | ||||||
|             if stream['protocol'].startswith('HLS'): |             if 'HLS' in stream['protocol']: | ||||||
|                 fmts, subs = self._extract_m3u8_formats_and_subtitles( |                 fmts, subs = self._extract_m3u8_formats_and_subtitles( | ||||||
|                     stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) |                     stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) | ||||||
|                 for fmt in fmts: |                 for fmt in fmts: | ||||||
| @@ -181,6 +191,7 @@ class ArteTVIE(ArteTVBaseIE): | |||||||
|                     secondary_formats.extend(fmts) |                     secondary_formats.extend(fmts) | ||||||
|                 else: |                 else: | ||||||
|                     formats.extend(fmts) |                     formats.extend(fmts) | ||||||
|  |                 subs = self._fix_accessible_subs_locale(subs) | ||||||
|                 self._merge_subtitles(subs, target=subtitles) |                 self._merge_subtitles(subs, target=subtitles) | ||||||
|  |  | ||||||
|             elif stream['protocol'] in ('HTTPS', 'RTMP'): |             elif stream['protocol'] in ('HTTPS', 'RTMP'): | ||||||
| @@ -236,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): | |||||||
|             'description': 'md5:be40b667f45189632b78c1425c7c2ce1', |             'description': 'md5:be40b667f45189632b78c1425c7c2ce1', | ||||||
|             'upload_date': '20201116', |             'upload_date': '20201116', | ||||||
|         }, |         }, | ||||||
|         'skip': 'No video available' |         'skip': 'No video available', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', |         'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -251,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor): | |||||||
|  |  | ||||||
|  |  | ||||||
| class ArteTVPlaylistIE(ArteTVBaseIE): | class ArteTVPlaylistIE(ArteTVBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES |     _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', |         'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -287,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): | |||||||
|  |  | ||||||
|  |  | ||||||
| class ArteTVCategoryIE(ArteTVBaseIE): | class ArteTVCategoryIE(ArteTVBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES |     _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.arte.tv/en/videos/politics-and-society/', |         'url': 'https://www.arte.tv/en/videos/politics-and-society/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -301,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): | |||||||
|     @classmethod |     @classmethod | ||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return ( |         return ( | ||||||
|             not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) |             not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) | ||||||
|             and super().suitable(url)) |             and super().suitable(url)) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -310,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE): | |||||||
|  |  | ||||||
|         items = [] |         items = [] | ||||||
|         for video in re.finditer( |         for video in re.finditer( | ||||||
|                 r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, |                 rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', | ||||||
|                 webpage): |                 webpage): | ||||||
|             video = video.group('url') |             video = video.group('url') | ||||||
|             if video == url: |             if video == url: | ||||||
|                 continue |                 continue | ||||||
|             if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): |             if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): | ||||||
|                 items.append(video) |                 items.append(video) | ||||||
|  |  | ||||||
|         title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None |         title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None | ||||||
|   | |||||||
| @@ -1,196 +0,0 @@ | |||||||
| import functools |  | ||||||
| import re |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor |  | ||||||
| from .kaltura import KalturaIE |  | ||||||
| from ..utils import ( |  | ||||||
|     extract_attributes, |  | ||||||
|     int_or_none, |  | ||||||
|     OnDemandPagedList, |  | ||||||
|     parse_age_limit, |  | ||||||
|     strip_or_none, |  | ||||||
|     try_get, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AsianCrushBaseIE(InfoExtractor): |  | ||||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))' |  | ||||||
|     _KALTURA_KEYS = [ |  | ||||||
|         'video_url', 'progressive_url', 'download_url', 'thumbnail_url', |  | ||||||
|         'widescreen_thumbnail_url', 'screencap_widescreen', |  | ||||||
|     ] |  | ||||||
|     _API_SUFFIX = {'retrocrush.tv': '-ott'} |  | ||||||
|  |  | ||||||
|     def _call_api(self, host, endpoint, video_id, query, resource): |  | ||||||
|         return self._download_json( |  | ||||||
|             'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id, |  | ||||||
|             'Downloading %s JSON metadata' % resource, query=query, |  | ||||||
|             headers=self.geo_verification_headers())['objects'] |  | ||||||
|  |  | ||||||
|     def _download_object_data(self, host, object_id, resource): |  | ||||||
|         return self._call_api( |  | ||||||
|             host, 'search', object_id, {'id': object_id}, resource)[0] |  | ||||||
|  |  | ||||||
|     def _get_object_description(self, obj): |  | ||||||
|         return strip_or_none(obj.get('long_description') or obj.get('short_description')) |  | ||||||
|  |  | ||||||
|     def _parse_video_data(self, video): |  | ||||||
|         title = video['name'] |  | ||||||
|  |  | ||||||
|         entry_id, partner_id = [None] * 2 |  | ||||||
|         for k in self._KALTURA_KEYS: |  | ||||||
|             k_url = video.get(k) |  | ||||||
|             if k_url: |  | ||||||
|                 mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url) |  | ||||||
|                 if mobj: |  | ||||||
|                     partner_id, entry_id = mobj.groups() |  | ||||||
|                     break |  | ||||||
|  |  | ||||||
|         meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or [] |  | ||||||
|         categories = list(filter(None, [c.get('name') for c in meta_categories])) |  | ||||||
|  |  | ||||||
|         show_info = video.get('show_info') or {} |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             '_type': 'url_transparent', |  | ||||||
|             'url': 'kaltura:%s:%s' % (partner_id, entry_id), |  | ||||||
|             'ie_key': KalturaIE.ie_key(), |  | ||||||
|             'id': entry_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': self._get_object_description(video), |  | ||||||
|             'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')), |  | ||||||
|             'categories': categories, |  | ||||||
|             'series': show_info.get('show_name'), |  | ||||||
|             'season_number': int_or_none(show_info.get('season_num')), |  | ||||||
|             'season_id': show_info.get('season_id'), |  | ||||||
|             'episode_number': int_or_none(show_info.get('episode_num')), |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AsianCrushIE(AsianCrushBaseIE): |  | ||||||
|     _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE |  | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt', |  | ||||||
|         'md5': 'c3b740e48d0ba002a42c0b72857beae6', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '1_y4tmjm5r', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': 'Women Who Flirt', |  | ||||||
|             'description': 'md5:b65c7e0ae03a85585476a62a186f924c', |  | ||||||
|             'timestamp': 1496936429, |  | ||||||
|             'upload_date': '20170608', |  | ||||||
|             'uploader_id': 'craig@crifkin.com', |  | ||||||
|             'age_limit': 13, |  | ||||||
|             'categories': 'count:5', |  | ||||||
|             'duration': 5812, |  | ||||||
|         }, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.midnightpulp.com/video/010400v/drifters/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         host, video_id = self._match_valid_url(url).groups() |  | ||||||
|  |  | ||||||
|         if host == 'cocoro.tv': |  | ||||||
|             webpage = self._download_webpage(url, video_id) |  | ||||||
|             embed_vars = self._parse_json(self._search_regex( |  | ||||||
|                 r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', |  | ||||||
|                 default='{}'), video_id, fatal=False) or {} |  | ||||||
|             video_id = embed_vars.get('entry_id') or video_id |  | ||||||
|  |  | ||||||
|         video = self._download_object_data(host, video_id, 'video') |  | ||||||
|         return self._parse_video_data(video) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AsianCrushPlaylistIE(AsianCrushBaseIE): |  | ||||||
|     _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE |  | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '6447', |  | ||||||
|             'title': 'Fruity Samurai', |  | ||||||
|             'description': 'md5:7535174487e4a202d3872a7fc8f2f154', |  | ||||||
|         }, |  | ||||||
|         'playlist_count': 13, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.yuyutv.com/series/013920s/peep-show/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.retrocrush.tv/series/012355s/true-tears', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |  | ||||||
|     _PAGE_SIZE = 1000000000 |  | ||||||
|  |  | ||||||
|     def _fetch_page(self, domain, parent_id, page): |  | ||||||
|         videos = self._call_api( |  | ||||||
|             domain, 'getreferencedobjects', parent_id, { |  | ||||||
|                 'max': self._PAGE_SIZE, |  | ||||||
|                 'object_type': 'video', |  | ||||||
|                 'parent_id': parent_id, |  | ||||||
|                 'start': page * self._PAGE_SIZE, |  | ||||||
|             }, 'page %d' % (page + 1)) |  | ||||||
|         for video in videos: |  | ||||||
|             yield self._parse_video_data(video) |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         host, playlist_id = self._match_valid_url(url).groups() |  | ||||||
|  |  | ||||||
|         if host == 'cocoro.tv': |  | ||||||
|             webpage = self._download_webpage(url, playlist_id) |  | ||||||
|  |  | ||||||
|             entries = [] |  | ||||||
|  |  | ||||||
|             for mobj in re.finditer( |  | ||||||
|                     r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, |  | ||||||
|                     webpage): |  | ||||||
|                 attrs = extract_attributes(mobj.group(0)) |  | ||||||
|                 if attrs.get('class') == 'clearfix': |  | ||||||
|                     entries.append(self.url_result( |  | ||||||
|                         mobj.group('url'), ie=AsianCrushIE.ie_key())) |  | ||||||
|  |  | ||||||
|             title = self._html_search_regex( |  | ||||||
|                 r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage, |  | ||||||
|                 'title', default=None) or self._og_search_title( |  | ||||||
|                 webpage, default=None) or self._html_search_meta( |  | ||||||
|                 'twitter:title', webpage, 'title', |  | ||||||
|                 default=None) or self._html_extract_title(webpage) |  | ||||||
|             if title: |  | ||||||
|                 title = re.sub(r'\s*\|\s*.+?$', '', title) |  | ||||||
|  |  | ||||||
|             description = self._og_search_description( |  | ||||||
|                 webpage, default=None) or self._html_search_meta( |  | ||||||
|                 'twitter:description', webpage, 'description', fatal=False) |  | ||||||
|         else: |  | ||||||
|             show = self._download_object_data(host, playlist_id, 'show') |  | ||||||
|             title = show.get('name') |  | ||||||
|             description = self._get_object_description(show) |  | ||||||
|             entries = OnDemandPagedList( |  | ||||||
|                 functools.partial(self._fetch_page, host, playlist_id), |  | ||||||
|                 self._PAGE_SIZE) |  | ||||||
|  |  | ||||||
|         return self.playlist_result(entries, playlist_id, title, description) |  | ||||||
							
								
								
									
										168
									
								
								plugins/youtube_download/yt_dlp/extractor/asobichannel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								plugins/youtube_download/yt_dlp/extractor/asobichannel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,168 @@ | |||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     clean_html, | ||||||
|  |     merge_dicts, | ||||||
|  |     parse_iso8601, | ||||||
|  |     url_or_none, | ||||||
|  | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AsobiChannelBaseIE(InfoExtractor): | ||||||
|  |     _MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'} | ||||||
|  |  | ||||||
|  |     def _extract_info(self, metadata): | ||||||
|  |         return traverse_obj(metadata, { | ||||||
|  |             'id': ('id', {str}), | ||||||
|  |             'title': ('title', {str}), | ||||||
|  |             'description': ('body', {clean_html}), | ||||||
|  |             'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}), | ||||||
|  |             'timestamp': ('publishedAt', {parse_iso8601}), | ||||||
|  |             'modified_timestamp': ('updatedAt', {parse_iso8601}), | ||||||
|  |             'channel': ('channel', 'name', {str}), | ||||||
|  |             'channel_id': ('channel', 'id', {str}), | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AsobiChannelIE(AsobiChannelBaseIE): | ||||||
|  |     IE_NAME = 'asobichannel' | ||||||
|  |     IE_DESC = 'ASOBI CHANNEL' | ||||||
|  |  | ||||||
|  |     _VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p', | ||||||
|  |         'md5': '39df74e872afe032c4eb27b89144fc92', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1ypp48qd32p', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1', | ||||||
|  |             'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2', | ||||||
|  |             'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png', | ||||||
|  |             'timestamp': 1697098247, | ||||||
|  |             'upload_date': '20231012', | ||||||
|  |             'modified_timestamp': 1698381162, | ||||||
|  |             'modified_date': '20231027', | ||||||
|  |             'channel': 'アイドルマスター', | ||||||
|  |             'channel_id': 'idolmaster', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj', | ||||||
|  |         'md5': '229fa8fb5c591c75ce8c37a497f113f6', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'redigiwnjzqj', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1', | ||||||
|  |             'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9', | ||||||
|  |             'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png', | ||||||
|  |             'modified_timestamp': 1697797125, | ||||||
|  |             'modified_date': '20231020', | ||||||
|  |             'timestamp': 1697261769, | ||||||
|  |             'upload_date': '20231014', | ||||||
|  |             'channel': 'アイドルマスター', | ||||||
|  |             'channel_id': 'idolmaster', | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     _survapi_header = None | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         token = self._download_json( | ||||||
|  |             'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None, | ||||||
|  |             note='Retrieving API token') | ||||||
|  |         self._survapi_header = {'Authorization': f'Bearer {token}'} | ||||||
|  |  | ||||||
|  |     def _process_vod(self, video_id, metadata): | ||||||
|  |         content_id = metadata['contents']['video_id'] | ||||||
|  |  | ||||||
|  |         vod_data = self._download_json( | ||||||
|  |             f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id, | ||||||
|  |             headers=self._survapi_header, note='Downloading vod data') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |     def _process_live(self, video_id, metadata): | ||||||
|  |         content_id = metadata['contents']['video_id'] | ||||||
|  |         event_data = self._download_json( | ||||||
|  |             f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id, | ||||||
|  |             headers=self._survapi_header, note='Downloading event data') | ||||||
|  |  | ||||||
|  |         player_type = traverse_obj(event_data, ('data', 'Player_type', {str})) | ||||||
|  |         if player_type == 'poster': | ||||||
|  |             self.raise_no_formats('Live event has not yet started', expected=True) | ||||||
|  |             live_status = 'is_upcoming' | ||||||
|  |             formats = [] | ||||||
|  |         elif player_type == 'player': | ||||||
|  |             live_status = 'is_live' | ||||||
|  |             formats = self._extract_m3u8_formats( | ||||||
|  |                 event_data['data']['Channel']['Custom_live_url'], video_id, live=True) | ||||||
|  |         else: | ||||||
|  |             raise ExtractorError('Unsupported player type {player_type!r}') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})), | ||||||
|  |             'live_status': live_status, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         metadata = self._download_json( | ||||||
|  |             f'https://channel.microcms.io/api/v1/media/{video_id}', video_id, | ||||||
|  |             headers=self._MICROCMS_HEADER) | ||||||
|  |  | ||||||
|  |         info = self._extract_info(metadata) | ||||||
|  |  | ||||||
|  |         video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str})) | ||||||
|  |         if video_type == 'VOD': | ||||||
|  |             return merge_dicts(info, self._process_vod(video_id, metadata)) | ||||||
|  |         if video_type == 'LIVE': | ||||||
|  |             return merge_dicts(info, self._process_live(video_id, metadata)) | ||||||
|  |  | ||||||
|  |         raise ExtractorError(f'Unexpected video type {video_type!r}') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AsobiChannelTagURLIE(AsobiChannelBaseIE): | ||||||
|  |     IE_NAME = 'asobichannel:tag' | ||||||
|  |     IE_DESC = 'ASOBI CHANNEL' | ||||||
|  |  | ||||||
|  |     _VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'bjhh-nbcja', | ||||||
|  |             'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 16, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'hvm5qw3c6od', | ||||||
|  |             'title': 'アイマスMOIW2023ラジオ', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 13, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         tag_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, tag_id) | ||||||
|  |         title = traverse_obj(self._search_nextjs_data( | ||||||
|  |             webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str})) | ||||||
|  |  | ||||||
|  |         media = self._download_json( | ||||||
|  |             f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})', | ||||||
|  |             tag_id, headers=self._MICROCMS_HEADER) | ||||||
|  |  | ||||||
|  |         def entries(): | ||||||
|  |             for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])): | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}', | ||||||
|  |                     'ie_key': AsobiChannelIE.ie_key(), | ||||||
|  |                     **self._extract_info(metadata), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries(), tag_id, title) | ||||||
							
								
								
									
										155
									
								
								plugins/youtube_download/yt_dlp/extractor/asobistage.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								plugins/youtube_download/yt_dlp/extractor/asobistage.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,155 @@ | |||||||
|  | import functools | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import str_or_none, url_or_none | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AsobiStageIE(InfoExtractor): | ||||||
|  |     IE_DESC = 'ASOBISTAGE (アソビステージ)' | ||||||
|  |     _VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '315passionhour_2022summer/archive/frame', | ||||||
|  |             'title': '315プロダクションプレゼンツ 315パッションアワー!!!', | ||||||
|  |             'thumbnail': r're:^https?://[\w.-]+/\w+/\w+', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 1, | ||||||
|  |         'playlist': [{ | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'edff52f2', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': '315passion_FRAME_only', | ||||||
|  |                 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+', | ||||||
|  |             }, | ||||||
|  |         }], | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'idolmaster_idolworld2023_goods/archive/live', | ||||||
|  |             'title': 'md5:378510b6e830129d505885908bd6c576', | ||||||
|  |             'thumbnail': r're:^https?://[\w.-]+/\w+/\w+', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 1, | ||||||
|  |         'playlist': [{ | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '3aef7110', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'asobistore_station_1020_serverREC', | ||||||
|  |                 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+', | ||||||
|  |             }, | ||||||
|  |         }], | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc', | ||||||
|  |         'playlist_count': 4, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'sidem_fclive_bpct/archive/premium_hc', | ||||||
|  |             'title': '315 Production presents F@NTASTIC COMBINATION LIVE ~BRAINPOWER!!~/~CONNECTIME!!!!~', | ||||||
|  |             'thumbnail': r're:^https?://[\w.-]+/\w+/\w+', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     _API_HOST = 'https://asobistage-api.asobistore.jp' | ||||||
|  |     _HEADERS = {} | ||||||
|  |     _is_logged_in = False | ||||||
|  |  | ||||||
|  |     @functools.cached_property | ||||||
|  |     def _owned_tickets(self): | ||||||
|  |         owned_tickets = set() | ||||||
|  |         if not self._is_logged_in: | ||||||
|  |             return owned_tickets | ||||||
|  |  | ||||||
|  |         for path, name in [ | ||||||
|  |             ('api/v1/purchase_history/list', 'ticket purchase history'), | ||||||
|  |             ('api/v1/serialcode/list', 'redemption history'), | ||||||
|  |         ]: | ||||||
|  |             response = self._download_json( | ||||||
|  |                 f'{self._API_HOST}/{path}', None, f'Downloading {name}', | ||||||
|  |                 f'Unable to download {name}', expected_status=400) | ||||||
|  |             if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin': | ||||||
|  |                 self._is_logged_in = False | ||||||
|  |                 break | ||||||
|  |             owned_tickets.update( | ||||||
|  |                 traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none}))) | ||||||
|  |  | ||||||
|  |         return owned_tickets | ||||||
|  |  | ||||||
|  |     def _get_available_channel_id(self, channel): | ||||||
|  |         channel_id = traverse_obj(channel, ('chennel_vspf_id', {str})) | ||||||
|  |         if not channel_id: | ||||||
|  |             return None | ||||||
|  |         # if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)' | ||||||
|  |         if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)): | ||||||
|  |             return channel_id | ||||||
|  |         available_tickets = traverse_obj(channel, ( | ||||||
|  |             'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none})) | ||||||
|  |         if not self._owned_tickets.intersection(available_tickets): | ||||||
|  |             self.report_warning( | ||||||
|  |                 f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"') | ||||||
|  |             return None | ||||||
|  |         return channel_id | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         if self._get_cookies(self._API_HOST): | ||||||
|  |             self._is_logged_in = True | ||||||
|  |         token = self._download_json( | ||||||
|  |             f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token') | ||||||
|  |         self._HEADERS['Authorization'] = f'Bearer {token}' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         webpage, urlh = self._download_webpage_handle(url, self._match_id(url)) | ||||||
|  |         video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug') | ||||||
|  |         video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_] | ||||||
|  |  | ||||||
|  |         event_data = traverse_obj( | ||||||
|  |             self._search_nextjs_data(webpage, video_id, default={}), | ||||||
|  |             ('props', 'pageProps', 'eventCMSData', { | ||||||
|  |                 'title': ('event_name', {str}), | ||||||
|  |                 'thumbnail': ('event_thumbnail_image', {url_or_none}), | ||||||
|  |             })) | ||||||
|  |  | ||||||
|  |         available_channels = traverse_obj(self._download_json( | ||||||
|  |             f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json', | ||||||
|  |             video_id, 'Getting channel list', 'Unable to get channel list'), ( | ||||||
|  |             video_type, lambda _, v: v['broadcast_slug'] == slug, | ||||||
|  |             'channels', lambda _, v: v['chennel_vspf_id'] != '00000')) | ||||||
|  |  | ||||||
|  |         entries = [] | ||||||
|  |         for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})): | ||||||
|  |             if video_type == 'archives': | ||||||
|  |                 channel_json = self._download_json( | ||||||
|  |                     f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id, | ||||||
|  |                     'Getting archive channel info', 'Unable to get archive channel info', fatal=False, | ||||||
|  |                     headers=self._HEADERS) | ||||||
|  |                 channel_data = traverse_obj(channel_json, ('ex_content', { | ||||||
|  |                     'm3u8_url': 'streaming_url', | ||||||
|  |                     'title': 'title', | ||||||
|  |                     'thumbnail': ('thumbnail', 'url'), | ||||||
|  |                 })) | ||||||
|  |             else:  # video_type == 'broadcasts' | ||||||
|  |                 channel_json = self._download_json( | ||||||
|  |                     f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id, | ||||||
|  |                     'Getting live channel info', 'Unable to get live channel info', fatal=False, | ||||||
|  |                     headers=self._HEADERS, query={'embed': 'channel'}) | ||||||
|  |                 channel_data = traverse_obj(channel_json, ('data', { | ||||||
|  |                     'm3u8_url': ('Channel', 'Custom_live_url'), | ||||||
|  |                     'title': 'Name', | ||||||
|  |                     'thumbnail': 'Poster_url', | ||||||
|  |                 })) | ||||||
|  |  | ||||||
|  |             entries.append({ | ||||||
|  |                 'id': channel_id, | ||||||
|  |                 'title': channel_data.get('title'), | ||||||
|  |                 'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False), | ||||||
|  |                 'is_live': video_type == 'broadcasts', | ||||||
|  |                 'thumbnail': url_or_none(channel_data.get('thumbnail')), | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         if not self._is_logged_in and not entries: | ||||||
|  |             self.raise_login_required() | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries, video_id, **event_data) | ||||||
| @@ -1,104 +1,152 @@ | |||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..networking.exceptions import HTTPError | from ..networking.exceptions import HTTPError | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     parse_age_limit, | ||||||
|  |     url_or_none, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
| ) | ) | ||||||
|  | from ..utils.traversal import traverse_obj | ||||||
|  |  | ||||||
|  |  | ||||||
| class AtresPlayerIE(InfoExtractor): | class AtresPlayerIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' |     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' | ||||||
|     _NETRC_MACHINE = 'atresplayer' |     _NETRC_MACHINE = 'atresplayer' | ||||||
|     _TESTS = [ |     _TESTS = [{ | ||||||
|         { |         'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/', | ||||||
|             'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', |         'info_dict': { | ||||||
|             'info_dict': { |             'ext': 'mp4', | ||||||
|                 'id': '5d4aa2c57ed1a88fc715a615', |             'id': '67f2dfb2fb6ab0e4c7203849', | ||||||
|                 'ext': 'mp4', |             'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c', | ||||||
|                 'title': 'Capítulo 7: Asuntos pendientes', |             'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."', | ||||||
|                 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', |             'channel': 'laSexta', | ||||||
|                 'duration': 3413, |             'duration': 31, | ||||||
|             }, |             'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg', | ||||||
|             'skip': 'This video is only available for registered users' |             'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'], | ||||||
|  |             'series': 'El Objetivo', | ||||||
|  |             'season': 'Temporada 12', | ||||||
|  |             'timestamp': 1743970079, | ||||||
|  |             'upload_date': '20250406', | ||||||
|         }, |         }, | ||||||
|         { |     }, { | ||||||
|             'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', |         'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/', | ||||||
|             'only_matching': True, |         'info_dict': { | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'id': '67f836baa4a5b0e4147ca59a', | ||||||
|  |             'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero', | ||||||
|  |             'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero', | ||||||
|  |             'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72', | ||||||
|  |             'channel': 'Antena 3', | ||||||
|  |             'duration': 2556, | ||||||
|  |             'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg', | ||||||
|  |             'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'], | ||||||
|  |             'series': 'El Hormiguero ', | ||||||
|  |             'season': 'Temporada 14', | ||||||
|  |             'timestamp': 1744320111, | ||||||
|  |             'upload_date': '20250410', | ||||||
|         }, |         }, | ||||||
|         { |     }, { | ||||||
|             'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', |         'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/', | ||||||
|             'only_matching': True, |         'info_dict': { | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'id': '67a6038b64ceca00070f4f69', | ||||||
|  |             'display_id': 'capitulo-3-supervivientes', | ||||||
|  |             'title': 'Capítulo 3: Supervivientes', | ||||||
|  |             'description': 'md5:65b231f20302f776c2b0dd24594599a1', | ||||||
|  |             'channel': 'Flooxer', | ||||||
|  |             'duration': 1196, | ||||||
|  |             'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg', | ||||||
|  |             'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'], | ||||||
|  |             'series': 'BIARA: Proyecto Lázarus', | ||||||
|  |             'season': 'Temporada 1', | ||||||
|  |             'season_number': 1, | ||||||
|  |             'episode': 'Episode 3', | ||||||
|  |             'episode_number': 3, | ||||||
|  |             'timestamp': 1743095191, | ||||||
|  |             'upload_date': '20250327', | ||||||
|         }, |         }, | ||||||
|     ] |     }, { | ||||||
|  |         'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|     _API_BASE = 'https://api.atresplayer.com/' |     _API_BASE = 'https://api.atresplayer.com/' | ||||||
|  |  | ||||||
|     def _handle_error(self, e, code): |  | ||||||
|         if isinstance(e.cause, HTTPError) and e.cause.status == code: |  | ||||||
|             error = self._parse_json(e.cause.response.read(), None) |  | ||||||
|             if error.get('error') == 'required_registered': |  | ||||||
|                 self.raise_login_required() |  | ||||||
|             raise ExtractorError(error['error_description'], expected=True) |  | ||||||
|         raise |  | ||||||
|  |  | ||||||
|     def _perform_login(self, username, password): |     def _perform_login(self, username, password): | ||||||
|         self._request_webpage( |  | ||||||
|             self._API_BASE + 'login', None, 'Downloading login page') |  | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             target_url = self._download_json( |             self._download_webpage( | ||||||
|                 'https://account.atresmedia.com/api/login', None, |                 'https://account.atresplayer.com/auth/v1/login', None, | ||||||
|                 'Logging in', headers={ |                 'Logging in', 'Failed to log in', data=urlencode_postdata({ | ||||||
|                     'Content-Type': 'application/x-www-form-urlencoded' |  | ||||||
|                 }, data=urlencode_postdata({ |  | ||||||
|                     'username': username, |                     'username': username, | ||||||
|                     'password': password, |                     'password': password, | ||||||
|                 }))['targetUrl'] |                 })) | ||||||
|         except ExtractorError as e: |         except ExtractorError as e: | ||||||
|             self._handle_error(e, 400) |             if isinstance(e.cause, HTTPError) and e.cause.status == 400: | ||||||
|  |                 raise ExtractorError('Invalid username and/or password', expected=True) | ||||||
|         self._request_webpage(target_url, None, 'Following Target URL') |             raise | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         display_id, video_id = self._match_valid_url(url).groups() |         display_id, video_id = self._match_valid_url(url).groups() | ||||||
|  |  | ||||||
|         try: |         metadata_url = self._download_json( | ||||||
|             episode = self._download_json( |             self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data', | ||||||
|                 self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) |             query={'href': urllib.parse.urlparse(url).path})['href'] | ||||||
|         except ExtractorError as e: |         metadata = self._download_json(metadata_url, video_id) | ||||||
|             self._handle_error(e, 403) |  | ||||||
|  |  | ||||||
|         title = episode['titulo'] |         try: | ||||||
|  |             video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data') | ||||||
|  |         except ExtractorError as e: | ||||||
|  |             if isinstance(e.cause, HTTPError) and e.cause.status == 403: | ||||||
|  |                 error = self._parse_json(e.cause.response.read(), None) | ||||||
|  |                 if error.get('error') == 'required_registered': | ||||||
|  |                     self.raise_login_required() | ||||||
|  |                 raise ExtractorError(error['error_description'], expected=True) | ||||||
|  |             raise | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         subtitles = {} |         subtitles = {} | ||||||
|         for source in episode.get('sources', []): |         for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))): | ||||||
|             src = source.get('src') |             src_url = source['src'] | ||||||
|             if not src: |  | ||||||
|                 continue |  | ||||||
|             src_type = source.get('type') |             src_type = source.get('type') | ||||||
|             if src_type == 'application/vnd.apple.mpegurl': |             if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'): | ||||||
|                 formats, subtitles = self._extract_m3u8_formats( |                 fmts, subs = self._extract_m3u8_formats_and_subtitles( | ||||||
|                     src, video_id, 'mp4', 'm3u8_native', |                     src_url, video_id, 'mp4', m3u8_id='hls', fatal=False) | ||||||
|                     m3u8_id='hls', fatal=False) |             elif src_type in ('application/dash+xml', 'application/dash+hevc'): | ||||||
|             elif src_type == 'application/dash+xml': |                 fmts, subs = self._extract_mpd_formats_and_subtitles( | ||||||
|                 formats, subtitles = self._extract_mpd_formats( |                     src_url, video_id, mpd_id='dash', fatal=False) | ||||||
|                     src, video_id, mpd_id='dash', fatal=False) |             else: | ||||||
|  |                 continue | ||||||
|         heartbeat = episode.get('heartbeat') or {} |             formats.extend(fmts) | ||||||
|         omniture = episode.get('omniture') or {} |             self._merge_subtitles(subs, target=subtitles) | ||||||
|         get_meta = lambda x: heartbeat.get(x) or omniture.get(x) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'display_id': display_id, |             'display_id': display_id, | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |  | ||||||
|             'description': episode.get('descripcion'), |  | ||||||
|             'thumbnail': episode.get('imgPoster'), |  | ||||||
|             'duration': int_or_none(episode.get('duration')), |  | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'channel': get_meta('channel'), |  | ||||||
|             'season': get_meta('season'), |  | ||||||
|             'episode_number': int_or_none(get_meta('episodeNumber')), |  | ||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
|  |             **traverse_obj(video_data, { | ||||||
|  |                 'title': ('titulo', {str}), | ||||||
|  |                 'description': ('descripcion', {str}), | ||||||
|  |                 'duration': ('duration', {int_or_none}), | ||||||
|  |                 'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}), | ||||||
|  |                 'age_limit': ('ageRating', {parse_age_limit}), | ||||||
|  |             }), | ||||||
|  |             **traverse_obj(metadata, { | ||||||
|  |                 'title': ('title', {str}), | ||||||
|  |                 'description': ('description', {str}), | ||||||
|  |                 'duration': ('duration', {int_or_none}), | ||||||
|  |                 'tags': ('tags', ..., 'title', {str}), | ||||||
|  |                 'age_limit': ('ageRating', {parse_age_limit}), | ||||||
|  |                 'series': ('format', 'title', {str}), | ||||||
|  |                 'season': ('currentSeason', 'title', {str}), | ||||||
|  |                 'season_number': ('currentSeason', 'seasonNumber', {int_or_none}), | ||||||
|  |                 'episode_number': ('numberOfEpisode', {int_or_none}), | ||||||
|  |                 'timestamp': ('publicationDate', {int_or_none(scale=1000)}), | ||||||
|  |                 'channel': ('channel', 'title', {str}), | ||||||
|  |             }), | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'data-scale-spring-2022', |             'id': 'data-scale-spring-2022', | ||||||
|             'title': 'Data @Scale Spring 2022', |             'title': 'Data @Scale Spring 2022', | ||||||
|             'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' |             'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://atscaleconference.com/events/video-scale-2021/', |         'url': 'https://atscaleconference.com/events/video-scale-2021/', | ||||||
| @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'video-scale-2021', |             'id': 'video-scale-2021', | ||||||
|             'title': 'Video @Scale 2021', |             'title': 'Video @Scale 2021', | ||||||
|             'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' |             'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', | ||||||
|         }, |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         id = self._match_id(url) |         playlist_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, id) |         webpage = self._download_webpage(url, playlist_id) | ||||||
|  |  | ||||||
|         return self.playlist_from_matches( |         return self.playlist_from_matches( | ||||||
|             re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), |             re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), | ||||||
|             ie='Generic', playlist_id=id, |             ie='Generic', playlist_id=playlist_id, | ||||||
|             title=self._og_search_title(webpage), description=self._og_search_description(webpage)) |             title=self._og_search_title(webpage), description=self._og_search_description(webpage)) | ||||||
|   | |||||||
| @@ -1,53 +0,0 @@ | |||||||
| from .common import InfoExtractor |  | ||||||
| from ..utils import unified_strdate |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ATTTechChannelIE(InfoExtractor): |  | ||||||
|     _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)' |  | ||||||
|     _TEST = { |  | ||||||
|         'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '11316', |  | ||||||
|             'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', |  | ||||||
|             'ext': 'flv', |  | ||||||
|             'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use', |  | ||||||
|             'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.', |  | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |  | ||||||
|             'upload_date': '20140127', |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             # rtmp download |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         display_id = self._match_id(url) |  | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, display_id) |  | ||||||
|  |  | ||||||
|         video_url = self._search_regex( |  | ||||||
|             r"url\s*:\s*'(rtmp://[^']+)'", |  | ||||||
|             webpage, 'video URL') |  | ||||||
|  |  | ||||||
|         video_id = self._search_regex( |  | ||||||
|             r'mediaid\s*=\s*(\d+)', |  | ||||||
|             webpage, 'video id', fatal=False) |  | ||||||
|  |  | ||||||
|         title = self._og_search_title(webpage) |  | ||||||
|         description = self._og_search_description(webpage) |  | ||||||
|         thumbnail = self._og_search_thumbnail(webpage) |  | ||||||
|         upload_date = unified_strdate(self._search_regex( |  | ||||||
|             r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})', |  | ||||||
|             webpage, 'upload date', fatal=False), False) |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': video_id, |  | ||||||
|             'display_id': display_id, |  | ||||||
|             'url': video_url, |  | ||||||
|             'ext': 'flv', |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|             'upload_date': upload_date, |  | ||||||
|         } |  | ||||||
| @@ -1,11 +1,11 @@ | |||||||
| import datetime | import datetime as dt | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     jwt_encode_hs256, |     jwt_encode_hs256, | ||||||
|     try_get, |     try_get, | ||||||
|     ExtractorError, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): | |||||||
|             'id': 'v-ce9cgn1e70n5-1', |             'id': 'v-ce9cgn1e70n5-1', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', |             'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', |         'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -66,14 +66,14 @@ class ATVAtIE(InfoExtractor): | |||||||
|             video_id=video_id) |             video_id=video_id) | ||||||
|  |  | ||||||
|         video_title = json_data['views']['default']['page']['title'] |         video_title = json_data['views']['default']['page']['title'] | ||||||
|         contentResource = json_data['views']['default']['page']['contentResource'] |         content_resource = json_data['views']['default']['page']['contentResource'] | ||||||
|         content_id = contentResource[0]['id'] |         content_id = content_resource[0]['id'] | ||||||
|         content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} |         content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} | ||||||
|                        for id, content in enumerate(contentResource)] |                        for id_, content in enumerate(content_resource)] | ||||||
|  |  | ||||||
|         time_of_request = datetime.datetime.now() |         time_of_request = dt.datetime.now() | ||||||
|         not_before = time_of_request - datetime.timedelta(minutes=5) |         not_before = time_of_request - dt.timedelta(minutes=5) | ||||||
|         expire = time_of_request + datetime.timedelta(minutes=5) |         expire = time_of_request + dt.timedelta(minutes=5) | ||||||
|         payload = { |         payload = { | ||||||
|             'content_ids': { |             'content_ids': { | ||||||
|                 content_id: content_ids, |                 content_id: content_ids, | ||||||
| @@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor): | |||||||
|         videos = self._download_json( |         videos = self._download_json( | ||||||
|             'https://vas-v4.p7s1video.net/4.0/getsources', |             'https://vas-v4.p7s1video.net/4.0/getsources', | ||||||
|             content_id, 'Downloading videos JSON', query={ |             content_id, 'Downloading videos JSON', query={ | ||||||
|                 'token': jwt_token.decode('utf-8') |                 'token': jwt_token.decode('utf-8'), | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         video_id, videos_data = list(videos['data'].items())[0] |         video_id, videos_data = next(iter(videos['data'].items())) | ||||||
|         error_msg = try_get(videos_data, lambda x: x['error']['title']) |         error_msg = try_get(videos_data, lambda x: x['error']['title']) | ||||||
|         if error_msg == 'Geo check failed': |         if error_msg == 'Geo check failed': | ||||||
|             self.raise_geo_restricted(error_msg) |             self.raise_geo_restricted(error_msg) | ||||||
|         elif error_msg: |         elif error_msg: | ||||||
|             raise ExtractorError(error_msg) |             raise ExtractorError(error_msg) | ||||||
|         entries = [ |         entries = [ | ||||||
|             self._extract_video_info(url, contentResource[video['id']], video) |             self._extract_video_info(url, content_resource[video['id']], video) | ||||||
|             for video in videos_data] |             for video in videos_data] | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|   | |||||||
| @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): | |||||||
|             'timestamp': 1448354940, |             'timestamp': 1448354940, | ||||||
|             'duration': 74022, |             'duration': 74022, | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', |         'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor): | |||||||
|                 bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) |                 bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) | ||||||
|                 if bitrate: |                 if bitrate: | ||||||
|                     f.update({ |                     f.update({ | ||||||
|                         'format_id': 'http-%s' % bitrate, |                         'format_id': f'http-{bitrate}', | ||||||
|                     }) |                     }) | ||||||
|                 formats.append(f) |                 formats.append(f) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): | |||||||
|             'duration': 4000.99, |             'duration': 4000.99, | ||||||
|             'uploader': 'Sue Perkins: An hour or so with...', |             'uploader': 'Sue Perkins: An hour or so with...', | ||||||
|             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', |             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', | ||||||
|         } |         }, | ||||||
|     }, {  # Direct mp3-file link |     }, {  # Direct mp3-file link | ||||||
|         'url': 'https://audioboom.com/posts/8128496.mp3', |         'url': 'https://audioboom.com/posts/8128496.mp3', | ||||||
|         'md5': 'e329edf304d450def95c7f86a9165ee1', |         'md5': 'e329edf304d450def95c7f86a9165ee1', | ||||||
| @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): | |||||||
|             'duration': 1689.7, |             'duration': 1689.7, | ||||||
|             'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', |             'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', | ||||||
|             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', |             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', | ||||||
|         } |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', |         'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|   | |||||||
| @@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor): | |||||||
|             headers={ |             headers={ | ||||||
|                 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', |                 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', | ||||||
|                 'X-Requested-With': 'XMLHttpRequest', |                 'X-Requested-With': 'XMLHttpRequest', | ||||||
|             }, data=f'id={player_entry_id}'.encode('utf-8')) |             }, data=f'id={player_entry_id}'.encode()) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': str(data_json['entry_id']), |             'id': str(data_json['entry_id']), | ||||||
| @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): | |||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') |         player_entry_id = self._search_regex( | ||||||
|  |             r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') | ||||||
|         return self._audiodraft_extract_from_id(player_entry_id) |         return self._audiodraft_extract_from_id(player_entry_id) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): | |||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         return self._audiodraft_extract_from_id(f'player_entry_{id}') |         return self._audiodraft_extract_from_id(f'player_entry_{video_id}') | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ import time | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .soundcloud import SoundcloudIE | from .soundcloud import SoundcloudIE | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     url_basename, |     url_basename, | ||||||
| @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): | |||||||
|                 'id': '310086', |                 'id': '310086', | ||||||
|                 'ext': 'mp3', |                 'ext': 'mp3', | ||||||
|                 'uploader': 'Roosh Williams', |                 'uploader': 'Roosh Williams', | ||||||
|                 'title': 'Extraordinary' |                 'title': 'Extraordinary', | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|         # audiomack wrapper around soundcloud song |         # audiomack wrapper around soundcloud song | ||||||
|         # Needs new test URL. |         # Needs new test URL. | ||||||
| @@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # API is inconsistent with errors |         # API is inconsistent with errors | ||||||
|         if 'url' not in api_response or not api_response['url'] or 'error' in api_response: |         if 'url' not in api_response or not api_response['url'] or 'error' in api_response: | ||||||
|             raise ExtractorError('Invalid url %s' % url) |             raise ExtractorError(f'Invalid url {url}') | ||||||
|  |  | ||||||
|         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper |         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper | ||||||
|         # if so, pass the work off to the soundcloud extractor |         # if so, pass the work off to the soundcloud extractor | ||||||
| @@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor): | |||||||
|             return self.url_result(api_response['url'], SoundcloudIE.ie_key()) |             return self.url_result(api_response['url'], SoundcloudIE.ie_key()) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': compat_str(api_response.get('id', album_url_tag)), |             'id': str(api_response.get('id', album_url_tag)), | ||||||
|             'uploader': api_response.get('artist'), |             'uploader': api_response.get('artist'), | ||||||
|             'title': api_response.get('title'), |             'title': api_response.get('title'), | ||||||
|             'url': api_response['url'], |             'url': api_response['url'], | ||||||
| @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): | |||||||
|             'info_dict': |             'info_dict': | ||||||
|             { |             { | ||||||
|                 'id': '812251', |                 'id': '812251', | ||||||
|                 'title': 'Tha Tour: Part 2 (Official Mixtape)' |                 'title': 'Tha Tour: Part 2 (Official Mixtape)', | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|         # Album playlist ripped from fakeshoredrive with no metadata |         # Album playlist ripped from fakeshoredrive with no metadata | ||||||
|         { |         { | ||||||
| @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): | |||||||
|                     'id': '837576', |                     'id': '837576', | ||||||
|                     'ext': 'mp3', |                     'ext': 'mp3', | ||||||
|                     'uploader': 'Lil Herb a.k.a. G Herbo', |                     'uploader': 'Lil Herb a.k.a. G Herbo', | ||||||
|                 } |                 }, | ||||||
|             }, { |             }, { | ||||||
|                 'info_dict': { |                 'info_dict': { | ||||||
|                     'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', |                     'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', | ||||||
|                     'id': '837580', |                     'id': '837580', | ||||||
|                     'ext': 'mp3', |                     'ext': 'mp3', | ||||||
|                     'uploader': 'Lil Herb a.k.a. G Herbo', |                     'uploader': 'Lil Herb a.k.a. G Herbo', | ||||||
|                 } |                 }, | ||||||
|             }], |             }], | ||||||
|         } |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor): | |||||||
|             api_response = self._download_json( |             api_response = self._download_json( | ||||||
|                 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' |                 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' | ||||||
|                 % (album_url_tag, track_no, time.time()), album_url_tag, |                 % (album_url_tag, track_no, time.time()), album_url_tag, | ||||||
|                 note='Querying song information (%d)' % (track_no + 1)) |                 note=f'Querying song information ({track_no + 1})') | ||||||
|  |  | ||||||
|             # Total failure, only occurs when url is totally wrong |             # Total failure, only occurs when url is totally wrong | ||||||
|             # Won't happen in middle of valid playlist (next case) |             # Won't happen in middle of valid playlist (next case) | ||||||
|             if 'url' not in api_response or 'error' in api_response: |             if 'url' not in api_response or 'error' in api_response: | ||||||
|                 raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) |                 raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') | ||||||
|             # URL is good but song id doesn't exist - usually means end of playlist |             # URL is good but song id doesn't exist - usually means end of playlist | ||||||
|             elif not api_response['url']: |             elif not api_response['url']: | ||||||
|                 break |                 break | ||||||
| @@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor): | |||||||
|                 # Pull out the album metadata and add to result (if it exists) |                 # Pull out the album metadata and add to result (if it exists) | ||||||
|                 for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: |                 for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: | ||||||
|                     if apikey in api_response and resultkey not in result: |                     if apikey in api_response and resultkey not in result: | ||||||
|                         result[resultkey] = compat_str(api_response[apikey]) |                         result[resultkey] = str(api_response[apikey]) | ||||||
|                 song_id = url_basename(api_response['url']).rpartition('.')[0] |                 song_id = url_basename(api_response['url']).rpartition('.')[0] | ||||||
|                 result['entries'].append({ |                 result['entries'].append({ | ||||||
|                     'id': compat_str(api_response.get('id', song_id)), |                     'id': str(api_response.get('id', song_id)), | ||||||
|                     'uploader': api_response.get('artist'), |                     'uploader': api_response.get('artist'), | ||||||
|                     'title': api_response.get('title', song_id), |                     'title': api_response.get('title', song_id), | ||||||
|                     'url': api_response['url'], |                     'url': api_response['url'], | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| import random | import random | ||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str, compat_urllib_parse_unquote |  | ||||||
| from ..utils import ExtractorError, str_or_none, try_get | from ..utils import ExtractorError, str_or_none, try_get | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor): | |||||||
|             if response_data is not None: |             if response_data is not None: | ||||||
|                 return response_data |                 return response_data | ||||||
|             if len(response) == 1 and 'message' in response: |             if len(response) == 1 and 'message' in response: | ||||||
|                 raise ExtractorError('API error: %s' % response['message'], |                 raise ExtractorError('API error: {}'.format(response['message']), | ||||||
|                                      expected=True) |                                      expected=True) | ||||||
|         raise ExtractorError('Unexpected API response') |         raise ExtractorError('Unexpected API response') | ||||||
|  |  | ||||||
|     def _select_api_base(self): |     def _select_api_base(self): | ||||||
|         """Selecting one of the currently available API hosts""" |         """Selecting one of the currently available API hosts""" | ||||||
|         response = super(AudiusBaseIE, self)._download_json( |         response = super()._download_json( | ||||||
|             'https://api.audius.co/', None, |             'https://api.audius.co/', None, | ||||||
|             note='Requesting available API hosts', |             note='Requesting available API hosts', | ||||||
|             errnote='Unable to request available API hosts') |             errnote='Unable to request available API hosts') | ||||||
| @@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor): | |||||||
|         anything from this link, since the Audius API won't be able to resolve |         anything from this link, since the Audius API won't be able to resolve | ||||||
|         this url |         this url | ||||||
|         """ |         """ | ||||||
|         url = compat_urllib_parse_unquote(url) |         url = urllib.parse.unquote(url) | ||||||
|         title = compat_urllib_parse_unquote(title) |         title = urllib.parse.unquote(title) | ||||||
|         if '/' in title or '%2F' in title: |         if '/' in title or '%2F' in title: | ||||||
|             fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') |             fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') | ||||||
|             return url.replace(title, fixed_title) |             return url.replace(title, fixed_title) | ||||||
| @@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor): | |||||||
|         if self._API_BASE is None: |         if self._API_BASE is None: | ||||||
|             self._select_api_base() |             self._select_api_base() | ||||||
|         try: |         try: | ||||||
|             response = super(AudiusBaseIE, self)._download_json( |             response = super()._download_json( | ||||||
|                 '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, |                 f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, | ||||||
|                 errnote=errnote, expected_status=expected_status) |                 errnote=errnote, expected_status=expected_status) | ||||||
|         except ExtractorError as exc: |         except ExtractorError as exc: | ||||||
|             # some of Audius API hosts may not work as expected and return HTML |             # some of Audius API hosts may not work as expected and return HTML | ||||||
|             if 'Failed to parse JSON' in compat_str(exc): |             if 'Failed to parse JSON' in str(exc): | ||||||
|                 raise ExtractorError('An error occurred while receiving data. Try again', |                 raise ExtractorError('An error occurred while receiving data. Try again', | ||||||
|                                      expected=True) |                                      expected=True) | ||||||
|             raise exc |             raise exc | ||||||
|         return self._get_response_data(response) |         return self._get_response_data(response) | ||||||
|  |  | ||||||
|     def _resolve_url(self, url, item_id): |     def _resolve_url(self, url, item_id): | ||||||
|         return self._api_request('/resolve?url=%s' % url, item_id, |         return self._api_request(f'/resolve?url={url}', item_id, | ||||||
|                                  expected_status=404) |                                  expected_status=404) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): | |||||||
|                 'view_count': int, |                 'view_count': int, | ||||||
|                 'like_count': int, |                 'like_count': int, | ||||||
|                 'repost_count': int, |                 'repost_count': int, | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             # Regular track |             # Regular track | ||||||
| @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): | |||||||
|                 'view_count': int, |                 'view_count': int, | ||||||
|                 'like_count': int, |                 'like_count': int, | ||||||
|                 'repost_count': int, |                 'repost_count': int, | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     _ARTWORK_MAP = { |     _ARTWORK_MAP = { | ||||||
|         "150x150": 150, |         '150x150': 150, | ||||||
|         "480x480": 480, |         '480x480': 480, | ||||||
|         "1000x1000": 1000 |         '1000x1000': 1000, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE): | |||||||
|         else:  # API link |         else:  # API link | ||||||
|             title = None |             title = None | ||||||
|             # uploader = None |             # uploader = None | ||||||
|             track_data = self._api_request('/tracks/%s' % track_id, track_id) |             track_data = self._api_request(f'/tracks/{track_id}', track_id) | ||||||
|  |  | ||||||
|         if not isinstance(track_data, dict): |         if not isinstance(track_data, dict): | ||||||
|             raise ExtractorError('Unexpected API response') |             raise ExtractorError('Unexpected API response') | ||||||
| @@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE): | |||||||
|         if isinstance(artworks_data, dict): |         if isinstance(artworks_data, dict): | ||||||
|             for quality_key, thumbnail_url in artworks_data.items(): |             for quality_key, thumbnail_url in artworks_data.items(): | ||||||
|                 thumbnail = { |                 thumbnail = { | ||||||
|                     "url": thumbnail_url |                     'url': thumbnail_url, | ||||||
|                 } |                 } | ||||||
|                 quality_code = self._ARTWORK_MAP.get(quality_key) |                 quality_code = self._ARTWORK_MAP.get(quality_key) | ||||||
|                 if quality_code is not None: |                 if quality_code is not None: | ||||||
| @@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE): | |||||||
|         return { |         return { | ||||||
|             'id': track_id, |             'id': track_id, | ||||||
|             'title': track_data.get('title', title), |             'title': track_data.get('title', title), | ||||||
|             'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), |             'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', | ||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
|             'description': track_data.get('description'), |             'description': track_data.get('description'), | ||||||
|             'duration': track_data.get('duration'), |             'duration': track_data.get('duration'), | ||||||
|             'track': track_data.get('title'), |             'track': track_data.get('title'), | ||||||
|             'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), |             'artist': try_get(track_data, lambda x: x['user']['name'], str), | ||||||
|             'genre': track_data.get('genre'), |             'genre': track_data.get('genre'), | ||||||
|             'thumbnails': thumbnails, |             'thumbnails': thumbnails, | ||||||
|             'view_count': track_data.get('play_count'), |             'view_count': track_data.get('play_count'), | ||||||
| @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE):  # XXX: Do not subclass from concrete IE | |||||||
|     _TESTS = [ |     _TESTS = [ | ||||||
|         { |         { | ||||||
|             'url': 'audius:9RWlo', |             'url': 'audius:9RWlo', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', |             'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', | ||||||
|             'only_matching': True |             'only_matching': True, | ||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
| @@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE): | |||||||
|             if not track_id: |             if not track_id: | ||||||
|                 raise ExtractorError('Unable to get track ID from playlist') |                 raise ExtractorError('Unable to get track ID from playlist') | ||||||
|             entries.append(self.url_result( |             entries.append(self.url_result( | ||||||
|                 'audius:%s' % track_id, |                 f'audius:{track_id}', | ||||||
|                 ie=AudiusTrackIE.ie_key(), video_id=track_id)) |                 ie=AudiusTrackIE.ie_key(), video_id=track_id)) | ||||||
|         return entries |         return entries | ||||||
|  |  | ||||||
| @@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE): | |||||||
|             raise ExtractorError('Unable to get playlist ID') |             raise ExtractorError('Unable to get playlist ID') | ||||||
|  |  | ||||||
|         playlist_tracks = self._api_request( |         playlist_tracks = self._api_request( | ||||||
|             '/playlists/%s/tracks' % playlist_id, |             f'/playlists/{playlist_id}/tracks', | ||||||
|             title, note='Downloading playlist tracks metadata', |             title, note='Downloading playlist tracks metadata', | ||||||
|             errnote='Unable to download playlist tracks metadata') |             errnote='Unable to download playlist tracks metadata') | ||||||
|         if not isinstance(playlist_tracks, list): |         if not isinstance(playlist_tracks, list): | ||||||
| @@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE):  # XXX: Do not subclass from concrete I | |||||||
|         profile_audius_id = _profile_data[0]['id'] |         profile_audius_id = _profile_data[0]['id'] | ||||||
|         profile_bio = _profile_data[0].get('bio') |         profile_bio = _profile_data[0].get('bio') | ||||||
|  |  | ||||||
|         api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) |         api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) | ||||||
|         return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) |         return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) | ||||||
|   | |||||||
| @@ -1,10 +1,7 @@ | |||||||
| import base64 | import base64 | ||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( |  | ||||||
|     compat_urllib_parse_urlencode, |  | ||||||
|     compat_str, |  | ||||||
| ) |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     format_field, |     format_field, | ||||||
|     int_or_none, |     int_or_none, | ||||||
| @@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor): | |||||||
|         show_id, video_id, season_id = self._match_valid_url(url).groups() |         show_id, video_id, season_id = self._match_valid_url(url).groups() | ||||||
|         if video_id and int(video_id) > 0: |         if video_id and int(video_id) > 0: | ||||||
|             return self.url_result( |             return self.url_result( | ||||||
|                 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') |                 f'http://awaan.ae/media/{video_id}', 'AWAANVideo') | ||||||
|         elif season_id and int(season_id) > 0: |         elif season_id and int(season_id) > 0: | ||||||
|             return self.url_result(smuggle_url( |             return self.url_result(smuggle_url( | ||||||
|                 'http://awaan.ae/program/season/%s' % season_id, |                 f'http://awaan.ae/program/season/{season_id}', | ||||||
|                 {'show_id': show_id}), 'AWAANSeason') |                 {'show_id': show_id}), 'AWAANSeason') | ||||||
|         else: |         else: | ||||||
|             return self.url_result( |             return self.url_result( | ||||||
|                 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') |                 f'http://awaan.ae/program/{show_id}', 'AWAANSeason') | ||||||
|  |  | ||||||
|  |  | ||||||
| class AWAANBaseIE(InfoExtractor): | class AWAANBaseIE(InfoExtractor): | ||||||
| @@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE): | |||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|         video_data = self._download_json( |         video_data = self._download_json( | ||||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, |             f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', | ||||||
|             video_id, headers={'Origin': 'http://awaan.ae'}) |             video_id, headers={'Origin': 'http://awaan.ae'}) | ||||||
|         info = self._parse_video_data(video_data, video_id, False) |         info = self._parse_video_data(video_data, video_id, False) | ||||||
|  |  | ||||||
|         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ |         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ | ||||||
|             'id': video_data['id'], |             'id': video_data['id'], | ||||||
|             'user_id': video_data['user_id'], |             'user_id': video_data['user_id'], | ||||||
|             'signature': video_data['signature'], |             'signature': video_data['signature'], | ||||||
| @@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE): | |||||||
|         channel_id = self._match_id(url) |         channel_id = self._match_id(url) | ||||||
|  |  | ||||||
|         channel_data = self._download_json( |         channel_data = self._download_json( | ||||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, |             f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', | ||||||
|             channel_id, headers={'Origin': 'http://awaan.ae'}) |             channel_id, headers={'Origin': 'http://awaan.ae'}) | ||||||
|         info = self._parse_video_data(channel_data, channel_id, True) |         info = self._parse_video_data(channel_data, channel_id, True) | ||||||
|  |  | ||||||
|         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ |         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ | ||||||
|             'id': base64.b64encode(channel_data['user_id'].encode()).decode(), |             'id': base64.b64encode(channel_data['user_id'].encode()).decode(), | ||||||
|             'channelid': base64.b64encode(channel_data['id'].encode()).decode(), |             'channelid': base64.b64encode(channel_data['id'].encode()).decode(), | ||||||
|             'signature': channel_data['signature'], |             'signature': channel_data['signature'], | ||||||
| @@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor): | |||||||
|             show_id = smuggled_data.get('show_id') |             show_id = smuggled_data.get('show_id') | ||||||
|             if show_id is None: |             if show_id is None: | ||||||
|                 season = self._download_json( |                 season = self._download_json( | ||||||
|                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, |                     f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', | ||||||
|                     season_id, headers={'Origin': 'http://awaan.ae'}) |                     season_id, headers={'Origin': 'http://awaan.ae'}) | ||||||
|                 show_id = season['id'] |                 show_id = season['id'] | ||||||
|         data['show_id'] = show_id |         data['show_id'] = show_id | ||||||
| @@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor): | |||||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/show', |             'http://admin.mangomolo.com/analytics/index.php/plus/show', | ||||||
|             show_id, data=urlencode_postdata(data), headers={ |             show_id, data=urlencode_postdata(data), headers={ | ||||||
|                 'Origin': 'http://awaan.ae', |                 'Origin': 'http://awaan.ae', | ||||||
|                 'Content-Type': 'application/x-www-form-urlencoded' |                 'Content-Type': 'application/x-www-form-urlencoded', | ||||||
|             }) |             }) | ||||||
|         if not season_id: |         if not season_id: | ||||||
|             season_id = show['default_season'] |             season_id = show['default_season'] | ||||||
| @@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor): | |||||||
|  |  | ||||||
|                 entries = [] |                 entries = [] | ||||||
|                 for video in show['videos']: |                 for video in show['videos']: | ||||||
|                     video_id = compat_str(video['id']) |                     video_id = str(video['id']) | ||||||
|                     entries.append(self.url_result( |                     entries.append(self.url_result( | ||||||
|                         'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) |                         f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) | ||||||
|  |  | ||||||
|                 return self.playlist_result(entries, season_id, title) |                 return self.playlist_result(entries, season_id, title) | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| import datetime | import datetime as dt | ||||||
| import hashlib | import hashlib | ||||||
| import hmac | import hmac | ||||||
|  | import urllib.parse | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_urllib_parse_urlencode |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor | class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor | ||||||
| @@ -12,26 +12,26 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|  |  | ||||||
|     def _aws_execute_api(self, aws_dict, video_id, query=None): |     def _aws_execute_api(self, aws_dict, video_id, query=None): | ||||||
|         query = query or {} |         query = query or {} | ||||||
|         amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') |         amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ') | ||||||
|         date = amz_date[:8] |         date = amz_date[:8] | ||||||
|         headers = { |         headers = { | ||||||
|             'Accept': 'application/json', |             'Accept': 'application/json', | ||||||
|             'Host': self._AWS_PROXY_HOST, |             'Host': self._AWS_PROXY_HOST, | ||||||
|             'X-Amz-Date': amz_date, |             'X-Amz-Date': amz_date, | ||||||
|             'X-Api-Key': self._AWS_API_KEY |             'X-Api-Key': self._AWS_API_KEY, | ||||||
|         } |         } | ||||||
|         session_token = aws_dict.get('session_token') |         session_token = aws_dict.get('session_token') | ||||||
|         if session_token: |         if session_token: | ||||||
|             headers['X-Amz-Security-Token'] = session_token |             headers['X-Amz-Security-Token'] = session_token | ||||||
|  |  | ||||||
|         def aws_hash(s): |         def aws_hash(s): | ||||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() |             return hashlib.sha256(s.encode()).hexdigest() | ||||||
|  |  | ||||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html |         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||||
|         canonical_querystring = compat_urllib_parse_urlencode(query) |         canonical_querystring = urllib.parse.urlencode(query) | ||||||
|         canonical_headers = '' |         canonical_headers = '' | ||||||
|         for header_name, header_value in sorted(headers.items()): |         for header_name, header_value in sorted(headers.items()): | ||||||
|             canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) |             canonical_headers += f'{header_name.lower()}:{header_value}\n' | ||||||
|         signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) |         signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) | ||||||
|         canonical_request = '\n'.join([ |         canonical_request = '\n'.join([ | ||||||
|             'GET', |             'GET', | ||||||
| @@ -39,7 +39,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|             canonical_querystring, |             canonical_querystring, | ||||||
|             canonical_headers, |             canonical_headers, | ||||||
|             signed_headers, |             signed_headers, | ||||||
|             aws_hash('') |             aws_hash(''), | ||||||
|         ]) |         ]) | ||||||
|  |  | ||||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html |         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||||
| @@ -49,7 +49,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|  |  | ||||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html |         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||||
|         def aws_hmac(key, msg): |         def aws_hmac(key, msg): | ||||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) |             return hmac.new(key, msg.encode(), hashlib.sha256) | ||||||
|  |  | ||||||
|         def aws_hmac_digest(key, msg): |         def aws_hmac_digest(key, msg): | ||||||
|             return aws_hmac(key, msg).digest() |             return aws_hmac(key, msg).digest() | ||||||
| @@ -57,7 +57,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|         def aws_hmac_hexdigest(key, msg): |         def aws_hmac_hexdigest(key, msg): | ||||||
|             return aws_hmac(key, msg).hexdigest() |             return aws_hmac(key, msg).hexdigest() | ||||||
|  |  | ||||||
|         k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') |         k_signing = ('AWS4' + aws_dict['secret_key']).encode() | ||||||
|         for value in credential_scope_list: |         for value in credential_scope_list: | ||||||
|             k_signing = aws_hmac_digest(k_signing, value) |             k_signing = aws_hmac_digest(k_signing, value) | ||||||
|  |  | ||||||
| @@ -65,11 +65,11 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with | |||||||
|  |  | ||||||
|         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html |         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html | ||||||
|         headers['Authorization'] = ', '.join([ |         headers['Authorization'] = ', '.join([ | ||||||
|             '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), |             '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), | ||||||
|             'SignedHeaders=%s' % signed_headers, |             f'SignedHeaders={signed_headers}', | ||||||
|             'Signature=%s' % signature, |             f'Signature={signature}', | ||||||
|         ]) |         ]) | ||||||
|  |  | ||||||
|         return self._download_json( |         return self._download_json( | ||||||
|             'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), |             'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), | ||||||
|             video_id, headers=headers) |             video_id, headers=headers) | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user