diff --git a/plugins/archiver/manifest.json b/plugins/archiver/manifest.json index 5d9fc1b..c7b2604 100644 --- a/plugins/archiver/manifest.json +++ b/plugins/archiver/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Archiver", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu_plugins", - "pass_fm_events": "true" - } + "name": "Archiver", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu_plugins", + "pass_fm_events": true } } diff --git a/plugins/disk_usage/manifest.json b/plugins/disk_usage/manifest.json index 8bcb387..3032da1 100644 --- a/plugins/disk_usage/manifest.json +++ b/plugins/disk_usage/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Disk Usage", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu_plugins", - "pass_fm_events": "true" - } + "name": "Disk Usage", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu_plugins", + "pass_fm_events": true } } diff --git a/plugins/favorites/manifest.json b/plugins/favorites/manifest.json index ce8a2f3..d40bc94 100644 --- a/plugins/favorites/manifest.json +++ b/plugins/favorites/manifest.json @@ -1,14 +1,12 @@ { - "manifest": { - "name": "Favorites", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "main_menu_bttn_box_bar", - "pass_fm_events": "true", - "pass_ui_objects": ["path_entry"], - "bind_keys": ["Favorites||show_favorites_menu:f"] - } + "name": "Favorites", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "main_menu_bttn_box_bar", + "pass_fm_events": true, + "pass_ui_objects": ["path_entry"], + "bind_keys": ["Favorites||show_favorites_menu:f"] } } diff --git a/plugins/file_properties/manifest.json b/plugins/file_properties/manifest.json index 7ba667d..f21348c 100644 --- a/plugins/file_properties/manifest.json +++ b/plugins/file_properties/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Properties", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu", - "pass_fm_events": "true" - } + "name": "Properties", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu", + "pass_fm_events": true } } diff --git a/plugins/git_clone/manifest.json b/plugins/git_clone/manifest.json index ff04e03..ec7b23e 100644 --- a/plugins/git_clone/manifest.json +++ b/plugins/git_clone/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Git Clone", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "plugin_control_list", - "pass_fm_events": "true" - } + "name": "Git Clone", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "plugin_control_list", + "pass_fm_events": true } } diff --git a/plugins/movie_tv_info/manifest.json b/plugins/movie_tv_info/manifest.json index 2bff10f..b9fa56e 100644 --- a/plugins/movie_tv_info/manifest.json +++ b/plugins/movie_tv_info/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Movie/TV Info", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu_plugins", - "pass_fm_events": "true" - } + "name": "Movie/TV Info", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu_plugins", + "pass_fm_events": true } } diff --git a/plugins/py_run/manifest.json b/plugins/py_run/manifest.json index 8597569..30530f2 100644 --- a/plugins/py_run/manifest.json +++ b/plugins/py_run/manifest.json @@ -1,13 +1,11 @@ { - "manifest": { - "name": "PyRun", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "plugin_control_list", - "pass_fm_events": "true", - "bind_keys": ["PyRun||send_message:r"] - } + "name": "PyRun", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "plugin_control_list", + "pass_fm_events": true, + "bind_keys": ["PyRun||send_message:r"] } } diff --git a/plugins/searcher/manifest.json b/plugins/searcher/manifest.json index 6b09f7d..8735939 100644 --- a/plugins/searcher/manifest.json +++ b/plugins/searcher/manifest.json @@ -1,13 +1,11 @@ { - "manifest": { - "name": "Search", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu", - "pass_fm_events": "true", - "bind_keys": ["Search||show_search_page:s"] - } + "name": "Search", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu", + "pass_fm_events": true, + "bind_keys": ["Search||show_search_page:s"] } } diff --git a/plugins/searcher/mixins/file_search_mixin.py b/plugins/searcher/mixins/file_search_mixin.py index 21ed0bf..323ef99 100644 --- a/plugins/searcher/mixins/file_search_mixin.py +++ b/plugins/searcher/mixins/file_search_mixin.py @@ -18,22 +18,22 @@ from ..widgets.file_preview_widget import FilePreviewWidget # NOTE: Threads WILL NOT die with parent's destruction. -def threaded(fn): - def wrapper(*args, **kwargs): - threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start() - - return wrapper - +# def threaded(fn): +# def wrapper(*args, **kwargs): +# threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start() +# +# return wrapper +# # NOTE: Threads WILL die with parent's destruction. -def daemon_threaded(fn): - def wrapper(*args, **kwargs): - threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start() - - return wrapper +# def daemon_threaded(fn): +# def wrapper(*args, **kwargs): +# threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start() +# +# return wrapper class FileSearchMixin: - def _run_find_file_query(self, widget=None, eve=None): + def _run_find_file_query(self, widget = None, eve = None): self._queue_search = True if not self._search_watcher_running: @@ -52,22 +52,22 @@ class FileSearchMixin: # Need to implement this over the threaded stuffs.... - - def cancel_timer(self): - if self.timer: - self.timer.cancel() - GLib.idle_remove_by_data(None) - - def delay_search_Glib(self): - GLib.idle_add(self._do_highlight) - - def delay_search(self): - wait_time = self.search_time / len(self.search_text) - wait_time = max(wait_time, 0.05) - - self.timer = threading.Timer(wait_time, self.delay_search_Glib) - self.timer.daemon = True - self.timer.start() +# +# def cancel_timer(self): +# if self.timer: +# self.timer.cancel() +# GLib.idle_remove_by_data(None) +# +# def delay_search_glib(self, query): +# GLib.idle_add(self._exec_find_file_query, *(query,)) +# +# def delay_search(self): +# wait_time = self.search_time / len(self.search_text) +# wait_time = max(wait_time, 0.05) +# +# self.timer = threading.Timer(wait_time, self.delay_search_glib, *(query,)) +# self.timer.daemon = True +# self.timer.start() diff --git a/plugins/template/manifest.json b/plugins/template/manifest.json index 10483da..5094f8d 100644 --- a/plugins/template/manifest.json +++ b/plugins/template/manifest.json @@ -1,14 +1,12 @@ { - "manifest": { - "name": "Example Plugin", - "author": "John Doe", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "plugin_control_list", - "pass_fm_events": "true", - "bind_keys": ["Example Plugin||send_message:f"] - }, - "pre_launch": "false" - } + "name": "Example Plugin", + "author": "John Doe", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "plugin_control_list", + "pass_fm_events": true, + "bind_keys": ["Example Plugin||send_message:f"] + }, + "pre_launch": false } diff --git a/plugins/thumbnailer/manifest.json b/plugins/thumbnailer/manifest.json index 576316c..5907ad1 100644 --- a/plugins/thumbnailer/manifest.json +++ b/plugins/thumbnailer/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Thumbnailer", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "pass_fm_events": "true" - }, - "pre_launch": "true" + "name": "Thumbnailer", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "pre_launch": true, + "requests": { + "pass_fm_events": true } } diff --git a/plugins/translate/manifest.json b/plugins/translate/manifest.json index 91caabe..ca6c00e 100644 --- a/plugins/translate/manifest.json +++ b/plugins/translate/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { "name": "Translate", "author": "ITDominator", "version": "0.0.1", "support": "", "requests": { "ui_target": "plugin_control_list", - "pass_fm_events": "true" + "pass_fm_events": true } - } } diff --git a/plugins/trasher/manifest.json b/plugins/trasher/manifest.json index 0657d65..db09913 100644 --- a/plugins/trasher/manifest.json +++ b/plugins/trasher/manifest.json @@ -1,16 +1,14 @@ { - "manifest": { - "name": "Trasher", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu", - "pass_fm_events": "true", - "bind_keys": [ - "Trasher||delete_files:Delete", - "Trasher||trash_files:d" - ] - } + "name": "Trasher", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu", + "pass_fm_events": true, + "bind_keys": [ + "Trasher||delete_files:Delete", + "Trasher||trash_files:d" + ] } } diff --git a/plugins/vod_thumbnailer/manifest.json b/plugins/vod_thumbnailer/manifest.json index 21523f0..74790c1 100644 --- a/plugins/vod_thumbnailer/manifest.json +++ b/plugins/vod_thumbnailer/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "VOD Thumbnailer", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "context_menu_plugins", - "pass_fm_events": "true" - } + "name": "VOD Thumbnailer", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "context_menu_plugins", + "pass_fm_events": true } } diff --git a/plugins/youtube_download/download.sh b/plugins/youtube_download/download.sh index af03894..9ec3b68 100755 --- a/plugins/youtube_download/download.sh +++ b/plugins/youtube_download/download.sh @@ -28,7 +28,7 @@ function main() { # Note: Download video python "${_SPATH}/yt_dlp/__main__.py" \ - -f "bestvideo[height<=1080][ext=mp4][vcodec^=avc]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ + -f "bestvideo[height<=1080][ext=mp4][vcodec^=av]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ --cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \ -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}" diff --git a/plugins/youtube_download/manifest.json b/plugins/youtube_download/manifest.json index 554ce1e..884cdb9 100644 --- a/plugins/youtube_download/manifest.json +++ b/plugins/youtube_download/manifest.json @@ -1,12 +1,10 @@ { - "manifest": { - "name": "Youtube Download", - "author": "ITDominator", - "version": "0.0.1", - "support": "", - "requests": { - "ui_target": "plugin_control_list", - "pass_fm_events": "true" - } + "name": "Youtube Download", + "author": "ITDominator", + "version": "0.0.1", + "support": "", + "requests": { + "ui_target": "plugin_control_list", + "pass_fm_events": true } } diff --git a/plugins/youtube_download/yt_dlp/YoutubeDL.py b/plugins/youtube_download/yt_dlp/YoutubeDL.py index 63e6e11..ef42ba6 100644 --- a/plugins/youtube_download/yt_dlp/YoutubeDL.py +++ b/plugins/youtube_download/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .extractor.openload import PhantomJSwrapper from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -52,7 +53,7 @@ from .networking.exceptions import ( SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -72,6 +73,7 @@ from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping from .update import ( REPOSITORY, _get_system_deprecation, + _get_outdated_warning, _make_label, current_git_head, detect_variant, @@ -302,7 +304,6 @@ class YoutubeDL: clean_infojson: Remove internal metadata from the infojson getcomments: Extract video comments. This will not be written to disk unless writeinfojson is also given - writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file allow_playlist_files: Whether to write playlists' description, infojson etc also to disk when using the 'write*' options @@ -482,7 +483,8 @@ class YoutubeDL: The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort, no-clean-infojson, no-playlist-metafiles, - no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort. + no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort, + mtime-by-default. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -490,7 +492,7 @@ class YoutubeDL: The template is mapped on a dictionary with keys 'progress' and 'info' retry_sleep_functions: Dictionary of functions that takes the number of attempts as argument and returns the time to sleep in seconds. - Allowed keys are 'http', 'fragment', 'file_access' + Allowed keys are 'http', 'fragment', 'file_access', 'extractor' download_ranges: A callback function that gets called for every video with the signature (info_dict, ydl) -> Iterable[Section]. Only the returned sections will be downloaded. @@ -502,16 +504,17 @@ class YoutubeDL: force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts noprogress: Do not print the progress bar live_from_start: Whether to download livestreams videos from the start + warn_when_outdated: Emit a warning if the yt-dlp version is older than 90 days The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, - continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size, - external_downloader_args, concurrent_fragment_downloads, progress_delta. + continuedl, hls_use_mpegts, http_chunk_size, external_downloader_args, + concurrent_fragment_downloads, progress_delta. The following options are used by the post processors: - ffmpeg_location: Location of the ffmpeg/avconv binary; either the path + ffmpeg_location: Location of the ffmpeg binary; either the path to the binary or its containing directory. postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) and a list of additional command-line arguments for the @@ -528,6 +531,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube @@ -561,32 +565,14 @@ class YoutubeDL: allsubtitles: - Use subtitleslangs = ['all'] Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) - include_ads: - Doesn't work - Download ads as well - call_home: - Not implemented - Boolean, true if we are allowed to contact the - yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step for each video file, after all postprocessors have been called. The filename will be passed as the only argument. hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. - Use the native HLS downloader instead of ffmpeg/avconv - if True, otherwise use ffmpeg/avconv if False, otherwise + Use the native HLS downloader instead of ffmpeg + if True, otherwise use ffmpeg if False, otherwise use downloader suggested by extractor if None. - prefer_ffmpeg: - avconv support is deprecated - If False, use avconv instead of ffmpeg if both are available, - otherwise prefer ffmpeg. - youtube_include_dash_manifest: - Use extractor_args - If True (default), DASH manifests and related - data will be downloaded and processed by extractor. - You can reduce network I/O by disabling it if you don't - care about DASH. (only for youtube) - youtube_include_hls_manifest: - Use extractor_args - If True (default), HLS manifests and related - data will be downloaded and processed by extractor. - You can reduce network I/O by disabling it if you don't - care about HLS. (only for youtube) no_color: Same as `color='no_color'` no_overwrites: Same as `overwrites=False` """ @@ -594,7 +580,7 @@ class YoutubeDL: _NUMERIC_FIELDS = { 'width', 'height', 'asr', 'audio_channels', 'fps', 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx', - 'timestamp', 'release_timestamp', + 'timestamp', 'release_timestamp', 'available_at', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', @@ -604,13 +590,13 @@ class YoutubeDL: _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py - 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', + 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'available_at', 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'hls_media_playlist_data', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url', - 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', + 'hls_aes', 'downloader_options', 'impersonate', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { @@ -640,6 +626,7 @@ class YoutubeDL: self._printed_messages = set() self._first_webpage_request = True self._post_hooks = [] + self._close_hooks = [] self._progress_hooks = [] self._postprocessor_hooks = [] self._download_retcode = 0 @@ -699,6 +686,9 @@ class YoutubeDL: system_deprecation = _get_system_deprecation() if system_deprecation: self.deprecated_feature(system_deprecation.replace('\n', '\n ')) + elif self.params.get('warn_when_outdated'): + if outdated_warning := _get_outdated_warning(): + self.report_warning(outdated_warning) if self.params.get('allow_unplayable_formats'): self.report_warning( @@ -741,12 +731,6 @@ class YoutubeDL: return True return False - if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): - if self.params.get('geo_verification_proxy') is None: - self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] - - check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') - check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') for msg in self.params.get('_warnings', []): @@ -908,6 +892,11 @@ class YoutubeDL: """Add the post hook""" self._post_hooks.append(ph) + def add_close_hook(self, ch): + """Add a close hook, called when YoutubeDL.close() is called""" + assert callable(ch), 'Close hook must be callable' + self._close_hooks.append(ch) + def add_progress_hook(self, ph): """Add the download progress hook""" self._progress_hooks.append(ph) @@ -1016,6 +1005,9 @@ class YoutubeDL: self._request_director.close() del self._request_director + for close_hook in self._close_hooks: + close_hook() + def trouble(self, message=None, tb=None, is_error=True): """Determine action to take when a download problem appears. @@ -2015,7 +2007,7 @@ class YoutubeDL: else: entries = resolved_entries = list(entries) n_entries = len(resolved_entries) - ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], []) + ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries, strict=True)) or ([], []) if not ie_result.get('playlist_count'): # Better to do this after potentially exhausting entries ie_result['playlist_count'] = all_entries.get_full_count() @@ -2185,7 +2177,7 @@ class YoutubeDL: return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2198,6 +2190,9 @@ class YoutubeDL: continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2208,11 +2203,18 @@ class YoutubeDL: os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: + f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2692,11 +2694,7 @@ class YoutubeDL: ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - with contextlib.suppress(ValueError, OverflowError, OSError): - upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc) - info_dict[date_key] = upload_date.strftime('%Y%m%d') + info_dict[date_key] = strftime_or_none(info_dict[ts_key]) if not info_dict.get('release_year'): info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) @@ -2787,7 +2785,7 @@ class YoutubeDL: dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} for idx, (prev, current, next_) in enumerate(zip( - (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): + (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter), strict=False), 1): if current.get('start_time') is None: current['start_time'] = prev.get('end_time') if not current.get('end_time'): @@ -2938,7 +2936,7 @@ class YoutubeDL: ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them @@ -3211,6 +3209,7 @@ class YoutubeDL: } else: params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -3313,28 +3312,6 @@ class YoutubeDL: elif _infojson_written is None: return - # Note: Annotations are deprecated - annofn = None - if self.params.get('writeannotations', False): - annofn = self.prepare_filename(info_dict, 'annotation') - if annofn: - if not self._ensure_dir_exists(annofn): - return - if not self.params.get('overwrites', True) and os.path.exists(annofn): - self.to_screen('[info] Video annotations are already present') - elif not info_dict.get('annotations'): - self.report_warning('There are no annotations to write.') - else: - try: - self.to_screen('[info] Writing video annotations to: ' + annofn) - with open(annofn, 'w', encoding='utf-8') as annofile: - annofile.write(info_dict['annotations']) - except (KeyError, TypeError): - self.report_warning('There are no annotations to write.') - except OSError: - self.report_error('Cannot write annotations file: ' + annofn) - return - # Write internet shortcut files def _write_link_file(link_type): url = try_get(info_dict['webpage_url'], iri_to_uri) @@ -3393,7 +3370,7 @@ class YoutubeDL: def existing_video_file(*filepaths): ext = info_dict.get('ext') converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) - file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), + file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths, strict=True)), default_overwrite=False) if file: info_dict['ext'] = os.path.splitext(file)[1][1:] @@ -3686,6 +3663,8 @@ class YoutubeDL: return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: @@ -3954,6 +3933,7 @@ class YoutubeDL: self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None, format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), @@ -3976,7 +3956,7 @@ class YoutubeDL: def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): - exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) + exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)), strict=True) if len(set(names)) == 1: names = [] if names[0] == 'unknown' else names[:1] return [lang, ', '.join(names), ', '.join(exts)] @@ -4015,8 +3995,7 @@ class YoutubeDL: if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret @@ -4133,8 +4112,7 @@ class YoutubeDL: self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) except CookieLoadError as error: cause = error.__context__ - # compat: <=py3.9: `traceback.format_exception` has a different signature - self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__))) + self.report_error(str(cause), tb=''.join(traceback.format_exception(cause))) raise @property @@ -4162,6 +4140,31 @@ class YoutubeDL: for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/plugins/youtube_download/yt_dlp/__init__.py b/plugins/youtube_download/yt_dlp/__init__.py index 714d9ad..8aee126 100644 --- a/plugins/youtube_download/yt_dlp/__init__.py +++ b/plugins/youtube_download/yt_dlp/__init__.py @@ -1,8 +1,8 @@ import sys -if sys.version_info < (3, 9): +if sys.version_info < (3, 10): raise ImportError( - f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541 + f'You are using an unsupported version of Python. Only Python versions 3.10 and above are supported by yt-dlp') # noqa: F541 __license__ = 'The Unlicense' @@ -59,11 +59,9 @@ from .utils import ( render_table, setproctitle, shell_quote, - traverse_obj, variadic, write_string, ) -from .utils.networking import std_headers from .utils._utils import _UnsafeExtensionError from .YoutubeDL import YoutubeDL @@ -157,7 +155,13 @@ def set_compat_opts(opts): if 'format-sort' in opts.compat_opts: opts.format_sort.extend(FormatSorter.ytdl_default) elif 'prefer-vp9-sort' in opts.compat_opts: - opts.format_sort.extend(FormatSorter._prefer_vp9_sort) + FormatSorter.default = FormatSorter._prefer_vp9_sort + + if 'mtime-by-default' in opts.compat_opts: + if opts.updatetime is None: + opts.updatetime = True + else: + _unused_compat_opt('mtime-by-default') _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) @@ -494,6 +498,14 @@ def validate_options(opts): 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + # Common mistake: -f mp4 + if opts.format == 'mp4': + warnings.append('.\n '.join(( + '"-f mp4" selects the best pre-merged mp4 format which is often not what\'s intended', + 'Pre-merged mp4 formats are not available from all sites, or may only be available in lower quality', + 'To prioritize the best h264 video and aac audio in an mp4 container, use "-t mp4" instead', + 'If you know what you are doing and want a pre-merged mp4 format, use "-f b[ext=mp4]" instead to suppress this warning'))) + # --(postprocessor/downloader)-args without name def report_args_compat(name, value, key1, key2=None, where=None): if key1 in value and key2 not in value: @@ -509,7 +521,6 @@ def validate_options(opts): if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') - opts.postprocessor_args.setdefault('sponskrub', []) def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): @@ -534,11 +545,6 @@ def validate_options(opts): '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') - report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') - report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') - report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') - report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', - val1=opts.sponskrub and opts.sponskrub_cut) # Conflicts with --allow-unplayable-formats report_conflict('--embed-metadata', 'addmetadata') @@ -551,23 +557,15 @@ def validate_options(opts): report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') - report_conflict('--sponskrub', 'sponskrub') report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) report_conflict('--xattrs', 'xattrs') - # Fully deprecated options - def report_deprecation(val, old, new=None): - if not val: - return + if hasattr(opts, '_deprecated_options'): deprecation_warnings.append( - f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new - else f'{old} is deprecated and may not work as expected') - - report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') - report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') - # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future - # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future - # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + f'The following options have been deprecated: {", ".join(opts._deprecated_options)}\n' + 'Please remove them from your command/configuration to avoid future errors.\n' + 'See https://github.com/yt-dlp/yt-dlp/issues/14198 for more details') + del opts._deprecated_options # Dependent options opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) @@ -698,21 +696,6 @@ def get_postprocessors(opts): 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, } - # Deprecated - # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment - # but must be below EmbedSubtitle and FFmpegMetadata - # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 - # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found - if opts.sponskrub is not False: - yield { - 'key': 'SponSkrub', - 'path': opts.sponskrub_path, - 'args': opts.sponskrub_args, - 'cut': opts.sponskrub_cut, - 'force': opts.sponskrub_force, - 'ignoreerror': opts.sponskrub is None, - '_from_cli': True, - } if opts.embedthumbnail: yield { 'key': 'EmbedThumbnail', @@ -871,7 +854,6 @@ def parse_options(argv=None): 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, - 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'allow_playlist_files': opts.allow_playlist_files, 'clean_infojson': opts.clean_infojson, @@ -905,7 +887,6 @@ def parse_options(argv=None): 'max_views': opts.max_views, 'daterange': opts.date, 'cachedir': opts.cachedir, - 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, @@ -923,13 +904,9 @@ def parse_options(argv=None): 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, 'debug_printtraffic': opts.debug_printtraffic, - 'prefer_ffmpeg': opts.prefer_ffmpeg, - 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'dynamic_mpd': opts.dynamic_mpd, 'extractor_args': opts.extractor_args, - 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, - 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, 'live_from_start': opts.live_from_start, @@ -941,7 +918,6 @@ def parse_options(argv=None): 'fixup': opts.fixup, 'source_address': opts.source_address, 'impersonate': opts.impersonate, - 'call_home': opts.call_home, 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, 'max_sleep_interval': opts.max_sleep_interval, @@ -951,7 +927,6 @@ def parse_options(argv=None): 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, - 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': opts.match_filter, 'color': opts.color, 'ffmpeg_location': opts.ffmpeg_location, @@ -960,11 +935,12 @@ def parse_options(argv=None): 'hls_split_discontinuity': opts.hls_split_discontinuity, 'external_downloader_args': opts.external_downloader_args, 'postprocessor_args': opts.postprocessor_args, - 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, + 'useid': opts.useid or None, + 'warn_when_outdated': opts.update_self is None, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, 'compat_opts': opts.compat_opts, @@ -976,12 +952,6 @@ def _real_main(argv=None): parser, opts, all_urls, ydl_opts = parse_options(argv) - # Dump user agent - if opts.dump_user_agent: - ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) - write_string(f'{ua}\n', out=sys.stdout) - return - if print_extractor_information(opts, all_urls): return @@ -1004,13 +974,8 @@ def _real_main(argv=None): try: updater = Updater(ydl, opts.update_self) - if opts.update_self and updater.update() and actual_use: - if updater.cmd: - return updater.restart() - # This code is reachable only for zip variant in py < 3.10 - # It makes sense to exit here, but the old behavior is to continue - ydl.report_warning('Restart yt-dlp to use the updated version') - # return 100, 'ERROR: The program must exit for the update to complete' + if opts.update_self and updater.update() and actual_use and updater.cmd: + return updater.restart() except Exception: traceback.print_exc() ydl._download_retcode = 100 @@ -1024,6 +989,7 @@ def _real_main(argv=None): (ImpersonateTarget('safari'), 'curl_cffi'), (ImpersonateTarget('firefox'), 'curl_cffi>=0.10'), (ImpersonateTarget('edge'), 'curl_cffi'), + (ImpersonateTarget('tor'), 'curl_cffi>=0.11'), ] available_targets = ydl._get_available_impersonate_targets() diff --git a/plugins/youtube_download/yt_dlp/aes.py b/plugins/youtube_download/yt_dlp/aes.py index 065901d..e5a2e67 100644 --- a/plugins/youtube_download/yt_dlp/aes.py +++ b/plugins/youtube_download/yt_dlp/aes.py @@ -435,7 +435,7 @@ def sub_bytes_inv(data): def rotate(data): - return data[1:] + [data[0]] + return [*data[1:], data[0]] def key_schedule_core(data, rcon_iteration): @@ -447,7 +447,7 @@ def key_schedule_core(data, rcon_iteration): def xor(data1, data2): - return [x ^ y for x, y in zip(data1, data2)] + return [x ^ y for x, y in zip(data1, data2, strict=False)] def iter_mix_columns(data, matrix): diff --git a/plugins/youtube_download/yt_dlp/compat/__init__.py b/plugins/youtube_download/yt_dlp/compat/__init__.py index d779620..ad12681 100644 --- a/plugins/youtube_download/yt_dlp/compat/__init__.py +++ b/plugins/youtube_download/yt_dlp/compat/__init__.py @@ -1,3 +1,4 @@ +import datetime as dt import os import xml.etree.ElementTree as etree @@ -27,6 +28,13 @@ def compat_ord(c): return c if isinstance(c, int) else ord(c) +def compat_datetime_from_timestamp(timestamp): + # Calling dt.datetime.fromtimestamp with negative timestamps throws error in Windows + # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/81708, + # https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 + return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + dt.timedelta(seconds=timestamp)) + + # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser diff --git a/plugins/youtube_download/yt_dlp/compat/_legacy.py b/plugins/youtube_download/yt_dlp/compat/_legacy.py index dae2c14..2f3e35d 100644 --- a/plugins/youtube_download/yt_dlp/compat/_legacy.py +++ b/plugins/youtube_download/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/plugins/youtube_download/yt_dlp/compat/types.py b/plugins/youtube_download/yt_dlp/compat/types.py deleted file mode 100644 index 4aa3b0e..0000000 --- a/plugins/youtube_download/yt_dlp/compat/types.py +++ /dev/null @@ -1,13 +0,0 @@ -# flake8: noqa: F405 -from types import * # noqa: F403 - -from .compat_utils import passthrough_module - -passthrough_module(__name__, 'types') -del passthrough_module - -try: - # NB: pypy has builtin NoneType, so checking NameError won't work - from types import NoneType # >= 3.10 -except ImportError: - NoneType = type(None) diff --git a/plugins/youtube_download/yt_dlp/compat/urllib/request.py b/plugins/youtube_download/yt_dlp/compat/urllib/request.py index dfc7f4a..ddb4e6f 100644 --- a/plugins/youtube_download/yt_dlp/compat/urllib/request.py +++ b/plugins/youtube_download/yt_dlp/compat/urllib/request.py @@ -22,15 +22,11 @@ if os.name == 'nt': def getproxies_registry_patched(): proxies = getproxies_registry() - if ( - sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final - or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final - ): - return proxies - for scheme in ('https', 'ftp'): - if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): - proxies[scheme] = 'http' + proxies[scheme][len(scheme):] + if sys.version_info < (3, 10, 5): # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final + for scheme in ('https', 'ftp'): + if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): + proxies[scheme] = 'http' + proxies[scheme][len(scheme):] return proxies diff --git a/plugins/youtube_download/yt_dlp/cookies.py b/plugins/youtube_download/yt_dlp/cookies.py index fad323c..4fdc0b8 100644 --- a/plugins/youtube_download/yt_dlp/cookies.py +++ b/plugins/youtube_download/yt_dlp/cookies.py @@ -125,6 +125,8 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), def _extract_firefox_cookies(profile, container, logger): + MAX_SUPPORTED_DB_SCHEMA_VERSION = 16 + logger.info('Extracting cookies from firefox') if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' @@ -159,9 +161,11 @@ def _extract_firefox_cookies(profile, container, logger): raise ValueError(f'could not find firefox container "{container}" in containers.json') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: - cursor = None - try: - cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor = _open_database_copy(cookie_database_path, tmpdir) + with contextlib.closing(cursor.connection): + db_schema_version = cursor.execute('PRAGMA user_version;').fetchone()[0] + if db_schema_version > MAX_SUPPORTED_DB_SCHEMA_VERSION: + logger.warning(f'Possibly unsupported firefox cookies database version: {db_schema_version}') if isinstance(container_id, int): logger.debug( f'Only loading cookies from firefox container "{container}", ID {container_id}') @@ -180,6 +184,10 @@ def _extract_firefox_cookies(profile, container, logger): total_cookie_count = len(table) for i, (host, name, value, path, expiry, is_secure) in enumerate(table): progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + # FF142 upgraded cookies DB to schema version 16 and started using milliseconds for cookie expiry + # Ref: https://github.com/mozilla-firefox/firefox/commit/5869af852cd20425165837f6c2d9971f3efba83d + if db_schema_version >= 16 and expiry is not None: + expiry /= 1000 cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), @@ -188,9 +196,6 @@ def _extract_firefox_cookies(profile, container, logger): jar.set_cookie(cookie) logger.info(f'Extracted {len(jar)} cookies from firefox') return jar - finally: - if cursor is not None: - cursor.connection.close() def _firefox_browser_dirs(): @@ -764,11 +769,11 @@ def _get_linux_desktop_environment(env, logger): GetDesktopEnvironment """ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) - desktop_session = env.get('DESKTOP_SESSION', None) + desktop_session = env.get('DESKTOP_SESSION', '') if xdg_current_desktop is not None: for part in map(str.strip, xdg_current_desktop.split(':')): if part == 'Unity': - if desktop_session is not None and 'gnome-fallback' in desktop_session: + if 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY @@ -797,35 +802,34 @@ def _get_linux_desktop_environment(env, logger): return _LinuxDesktopEnvironment.UKUI elif part == 'LXQt': return _LinuxDesktopEnvironment.LXQT - logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') - elif desktop_session is not None: - if desktop_session == 'deepin': - return _LinuxDesktopEnvironment.DEEPIN - elif desktop_session in ('mate', 'gnome'): - return _LinuxDesktopEnvironment.GNOME - elif desktop_session in ('kde4', 'kde-plasma'): + if desktop_session == 'deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif desktop_session in ('mate', 'gnome'): + return _LinuxDesktopEnvironment.GNOME + elif desktop_session in ('kde4', 'kde-plasma'): + return _LinuxDesktopEnvironment.KDE4 + elif desktop_session == 'kde': + if 'KDE_SESSION_VERSION' in env: return _LinuxDesktopEnvironment.KDE4 - elif desktop_session == 'kde': - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 - elif 'xfce' in desktop_session or desktop_session == 'xubuntu': - return _LinuxDesktopEnvironment.XFCE - elif desktop_session == 'ukui': - return _LinuxDesktopEnvironment.UKUI else: - logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') - + return _LinuxDesktopEnvironment.KDE3 + elif 'xfce' in desktop_session or desktop_session == 'xubuntu': + return _LinuxDesktopEnvironment.XFCE + elif desktop_session == 'ukui': + return _LinuxDesktopEnvironment.UKUI else: - if 'GNOME_DESKTOP_SESSION_ID' in env: - return _LinuxDesktopEnvironment.GNOME - elif 'KDE_FULL_SESSION' in env: - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 + logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') + + if 'GNOME_DESKTOP_SESSION_ID' in env: + return _LinuxDesktopEnvironment.GNOME + elif 'KDE_FULL_SESSION' in env: + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + return _LinuxDesktopEnvironment.OTHER @@ -1336,7 +1340,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line diff --git a/plugins/youtube_download/yt_dlp/downloader/__init__.py b/plugins/youtube_download/yt_dlp/downloader/__init__.py index 9c34bd2..17458b9 100644 --- a/plugins/youtube_download/yt_dlp/downloader/__init__.py +++ b/plugins/youtube_download/yt_dlp/downloader/__init__.py @@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/plugins/youtube_download/yt_dlp/downloader/common.py b/plugins/youtube_download/yt_dlp/downloader/common.py index bb9303f..122c479 100644 --- a/plugins/youtube_download/yt_dlp/downloader/common.py +++ b/plugins/youtube_download/yt_dlp/downloader/common.py @@ -62,7 +62,6 @@ class FileDownloader: test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size - xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. progress_delta: The minimum time between progress output, in seconds external_downloader_args: A dictionary of downloader keys (in lower case) and a list of additional command-line arguments for the @@ -455,14 +454,26 @@ class FileDownloader: self._finish_multiline_status() return True, False + sleep_note = '' if subtitle: sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: min_sleep_interval = self.params.get('sleep_interval') or 0 + max_sleep_interval = self.params.get('max_sleep_interval') or 0 + + if available_at := info_dict.get('available_at'): + forced_sleep_interval = available_at - int(time.time()) + if forced_sleep_interval > min_sleep_interval: + sleep_note = 'as required by the site' + min_sleep_interval = forced_sleep_interval + if forced_sleep_interval > max_sleep_interval: + max_sleep_interval = forced_sleep_interval + sleep_interval = random.uniform( - min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) + min_sleep_interval, max_sleep_interval or min_sleep_interval) + if sleep_interval > 0: - self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds {sleep_note}...') time.sleep(sleep_interval) ret = self.real_download(filename, info_dict) @@ -495,3 +506,14 @@ class FileDownloader: exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/plugins/youtube_download/yt_dlp/downloader/dash.py b/plugins/youtube_download/yt_dlp/downloader/dash.py index afc79b6..bf8652d 100644 --- a/plugins/youtube_download/yt_dlp/downloader/dash.py +++ b/plugins/youtube_download/yt_dlp/downloader/dash.py @@ -3,7 +3,7 @@ import urllib.parse from . import get_suitable_downloader from .fragment import FragmentFD -from ..utils import update_url_query, urljoin +from ..utils import ReExtractInfo, update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -28,6 +28,11 @@ class DashSegmentsFD(FragmentFD): requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] args = [] for fmt in requested_formats or [info_dict]: + # Re-extract if --load-info-json is used and 'fragments' was originally a generator + # See https://github.com/yt-dlp/yt-dlp/issues/13906 + if isinstance(fmt['fragments'], str): + raise ReExtractInfo('the stream needs to be re-extracted', expected=True) + try: fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) except TypeError: diff --git a/plugins/youtube_download/yt_dlp/downloader/external.py b/plugins/youtube_download/yt_dlp/downloader/external.py index ee73ac0..3b8fd27 100644 --- a/plugins/youtube_download/yt_dlp/downloader/external.py +++ b/plugins/youtube_download/yt_dlp/downloader/external.py @@ -563,7 +563,7 @@ class FFmpegFD(ExternalFD): f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n' for cookie in cookies)]) if fmt.get('http_headers') and is_http: - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) @@ -572,7 +572,21 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] @@ -640,10 +654,6 @@ class FFmpegFD(ExternalFD): return retval -class AVconvFD(FFmpegFD): - pass - - _BY_NAME = { klass.get_basename(): klass for name, klass in globals().items() diff --git a/plugins/youtube_download/yt_dlp/downloader/f4m.py b/plugins/youtube_download/yt_dlp/downloader/f4m.py index 22d0ebd..3c8f026 100644 --- a/plugins/youtube_download/yt_dlp/downloader/f4m.py +++ b/plugins/youtube_download/yt_dlp/downloader/f4m.py @@ -149,14 +149,14 @@ class FlvReader(io.BytesIO): segments_count = self.read_unsigned_char() segments = [] for _ in range(segments_count): - box_size, box_type, box_data = self.read_box_info() + _box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] for _ in range(fragments_run_count): - box_size, box_type, box_data = self.read_box_info() + _box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) @@ -167,7 +167,7 @@ class FlvReader(io.BytesIO): } def read_bootstrap_info(self): - total_size, box_type, box_data = self.read_box_info() + _, box_type, box_data = self.read_box_info() assert box_type == b'abst' return FlvReader(box_data).read_abst() @@ -324,9 +324,9 @@ class F4mFD(FragmentFD): if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) - rate, media = formats[-1] + _, media = formats[-1] else: - rate, media = next(filter( + _, media = next(filter( lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. diff --git a/plugins/youtube_download/yt_dlp/downloader/fragment.py b/plugins/youtube_download/yt_dlp/downloader/fragment.py index 98784e7..7852ae9 100644 --- a/plugins/youtube_download/yt_dlp/downloader/fragment.py +++ b/plugins/youtube_download/yt_dlp/downloader/fragment.py @@ -302,7 +302,7 @@ class FragmentFD(FileDownloader): elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) filetime = ctx.get('fragment_filetime') - if self.params.get('updatetime', True) and filetime: + if self.params.get('updatetime') and filetime: with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) diff --git a/plugins/youtube_download/yt_dlp/downloader/hls.py b/plugins/youtube_download/yt_dlp/downloader/hls.py index 1f36a07..58cfbbf 100644 --- a/plugins/youtube_download/yt_dlp/downloader/hls.py +++ b/plugins/youtube_download/yt_dlp/downloader/hls.py @@ -94,12 +94,19 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() - no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s - if no_crypto and has_ffmpeg: - can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' - elif no_crypto: - message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' - 'Decryption will be performed natively, but will be extremely slow') + if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s: + # Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg + ffmpeg_can_dl = not traverse_obj(info_dict, (( + 'extra_param_to_segment_url', 'extra_param_to_key_url', + 'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')), + ), any)) + message = 'The stream has AES-128 encryption and {} available'.format( + 'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else + 'pycryptodomex is not') + if has_ffmpeg and ffmpeg_can_dl: + can_download = False + else: + message += '; decryption will be performed natively, but will be extremely slow' elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' @@ -198,7 +205,7 @@ class HlsFD(FragmentFD): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -224,7 +231,7 @@ class HlsFD(FragmentFD): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( diff --git a/plugins/youtube_download/yt_dlp/downloader/http.py b/plugins/youtube_download/yt_dlp/downloader/http.py index 9c6dd8b..6dcf7e7 100644 --- a/plugins/youtube_download/yt_dlp/downloader/http.py +++ b/plugins/youtube_download/yt_dlp/downloader/http.py @@ -13,12 +13,9 @@ from ..utils import ( ContentTooShortError, RetryManager, ThrottledDownload, - XAttrMetadataError, - XAttrUnavailableError, int_or_none, parse_http_range, try_call, - write_xattr, ) from ..utils.networking import HTTPHeaderDict @@ -27,6 +24,10 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +110,7 @@ class HttpFD(FileDownloader): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' @@ -269,12 +270,6 @@ class HttpFD(FileDownloader): self.report_error(f'unable to open for writing: {err}') return False - if self.params.get('xattr_set_filesize', False) and data_len is not None: - try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) - except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error(f'unable to set filesize xattr: {err}') - try: ctx.stream.write(data_block) except OSError as err: @@ -348,7 +343,7 @@ class HttpFD(FileDownloader): self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time - if self.params.get('updatetime', True): + if self.params.get('updatetime'): info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ diff --git a/plugins/youtube_download/yt_dlp/downloader/niconico.py b/plugins/youtube_download/yt_dlp/downloader/niconico.py index 33cf15d..35a12b5 100644 --- a/plugins/youtube_download/yt_dlp/downloader/niconico.py +++ b/plugins/youtube_download/yt_dlp/downloader/niconico.py @@ -5,47 +5,46 @@ import time from .common import FileDownloader from .external import FFmpegFD from ..networking import Request -from ..utils import DownloadError, str_or_none, try_get +from ..networking.websocket import WebSocketResponse +from ..utils import DownloadError, str_or_none, truncate_string +from ..utils.traversal import traverse_obj class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ def real_download(self, filename, info_dict): - video_id = info_dict['video_id'] - ws_url = info_dict['url'] - ws_extractor = info_dict['ws'] - ws_origin_host = info_dict['origin'] - live_quality = info_dict.get('live_quality', 'high') - live_latency = info_dict.get('live_latency', 'high') + video_id = info_dict['id'] + opts = info_dict['downloader_options'] + quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url'] dl = FFmpegFD(self.ydl, self.params or {}) new_info_dict = info_dict.copy() - new_info_dict.update({ - 'protocol': 'm3u8', - }) + new_info_dict['protocol'] = 'm3u8' def communicate_ws(reconnect): - if reconnect: - ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) + # Support --load-info-json as if it is a reconnect attempt + if reconnect or not isinstance(ws_extractor, WebSocketResponse): + ws = self.ydl.urlopen(Request( + ws_url, headers={'Origin': 'https://live.nicovideo.jp'})) if self.ydl.params.get('verbose', False): - self.to_screen('[debug] Sending startWatching request') + self.write_debug('Sending startWatching request') ws.send(json.dumps({ - 'type': 'startWatching', 'data': { + 'reconnect': True, + 'room': { + 'commentable': True, + 'protocol': 'webSocket', + }, 'stream': { - 'quality': live_quality, - 'protocol': 'hls+fmp4', - 'latency': live_latency, 'accessRightMethod': 'single_cookie', 'chasePlay': False, + 'latency': 'high', + 'protocol': 'hls', + 'quality': quality, }, - 'room': { - 'protocol': 'webSocket', - 'commentable': True, - }, - 'reconnect': True, }, + 'type': 'startWatching', })) else: ws = ws_extractor @@ -58,7 +57,6 @@ class NiconicoLiveFD(FileDownloader): if not data or not isinstance(data, dict): continue if data.get('type') == 'ping': - # pong back ws.send(r'{"type":"pong"}') ws.send(r'{"type":"keepSeat"}') elif data.get('type') == 'disconnect': @@ -66,12 +64,10 @@ class NiconicoLiveFD(FileDownloader): return True elif data.get('type') == 'error': self.write_debug(data) - message = try_get(data, lambda x: x['body']['code'], str) or recv + message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv) return DownloadError(message) elif self.ydl.params.get('verbose', False): - if len(recv) > 100: - recv = recv[:100] + '...' - self.to_screen(f'[debug] Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') def ws_main(): reconnect = False @@ -81,7 +77,8 @@ class NiconicoLiveFD(FileDownloader): if ret is True: return except BaseException as e: - self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) + self.to_screen( + f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}') time.sleep(10) continue finally: diff --git a/plugins/youtube_download/yt_dlp/extractor/_extractors.py b/plugins/youtube_download/yt_dlp/extractor/_extractors.py index bb1c3db..072169d 100644 --- a/plugins/youtube_download/yt_dlp/extractor/_extractors.py +++ b/plugins/youtube_download/yt_dlp/extractor/_extractors.py @@ -58,13 +58,7 @@ from .adn import ( ADNSeasonIE, ) from .adobeconnect import AdobeConnectIE -from .adobetv import ( - AdobeTVChannelIE, - AdobeTVEmbedIE, - AdobeTVIE, - AdobeTVShowIE, - AdobeTVVideoIE, -) +from .adobetv import AdobeTVVideoIE from .adultswim import AdultSwimIE from .aenetworks import ( AENetworksCollectionIE, @@ -152,7 +146,6 @@ from .ard import ( ARDBetaMediathekIE, ARDMediathekCollectionIE, ) -from .arkena import ArkenaIE from .arnes import ArnesIE from .art19 import ( Art19IE, @@ -201,7 +194,6 @@ from .banbye import ( BanByeChannelIE, BanByeIE, ) -from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, @@ -229,7 +221,6 @@ from .beatbump import ( from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE @@ -275,7 +266,10 @@ from .bitchute import ( BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, @@ -300,7 +294,6 @@ from .brainpop import ( BrainPOPIlIE, BrainPOPJrIE, ) -from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, @@ -310,6 +303,7 @@ from .brilliantpala import ( BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) +from .btvplus import BTVPlusIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .bunnycdn import BunnyCdnIE @@ -338,12 +332,12 @@ from .canalc2 import Canalc2IE from .canalplus import CanalplusIE from .canalsurmas import CanalsurmasIE from .caracoltv import CaracolTvPlayIE -from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCGemIE, CBCGemLiveIE, CBCGemPlaylistIE, + CBCListenIE, CBCPlayerIE, CBCPlayerPlaylistIE, ) @@ -405,16 +399,12 @@ from .cloudflarestream import CloudflareStreamIE from .cloudycdn import CloudyCDNIE from .clubic import ClubicIE from .clyp import ClypIE -from .cmt import CMTIE from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, CNNIndonesiaIE, ) -from .comedycentral import ( - ComedyCentralIE, - ComedyCentralTVIE, -) +from .comedycentral import ComedyCentralIE from .commonmistakes import ( BlobIE, CommonMistakesIE, @@ -435,7 +425,6 @@ from .cpac import ( CPACPlaylistIE, ) from .cracked import CrackedIE -from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( @@ -448,7 +437,6 @@ from .cspan import ( CSpanIE, ) from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( @@ -456,10 +444,6 @@ from .curiositystream import ( CuriosityStreamIE, CuriosityStreamSeriesIE, ) -from .cwtv import ( - CWTVIE, - CWTVMovieIE, -) from .cybrary import ( CybraryCourseIE, CybraryIE, @@ -572,10 +556,6 @@ from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -641,6 +621,10 @@ from .fancode import ( FancodeVodIE, ) from .fathom import FathomIE +from .faulio import ( + FaulioIE, + FaulioLiveIE, +) from .faz import FazIE from .fc2 import ( FC2IE, @@ -807,9 +791,7 @@ from .holodex import HolodexIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPlaylistIE, HotStarPrefixIE, - HotStarSeasonIE, HotStarSeriesIE, ) from .hrefli import HrefLiRedirectIE @@ -842,6 +824,13 @@ from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveVODIE, ) +from .idagio import ( + IdagioAlbumIE, + IdagioPersonalPlaylistIE, + IdagioPlaylistIE, + IdagioRecordingIE, + IdagioTrackIE, +) from .idolplus import IdolPlusIE from .ign import ( IGNIE, @@ -923,17 +912,15 @@ from .japandiet import ( ShugiinItvVodIE, ) from .jeuxvideo import JeuxVideoIE -from .jiocinema import ( - JioCinemaIE, - JioCinemaSeriesIE, -) from .jiosaavn import ( JioSaavnAlbumIE, + JioSaavnArtistIE, JioSaavnPlaylistIE, + JioSaavnShowIE, + JioSaavnShowPlaylistIE, JioSaavnSongIE, ) from .joj import JojIE -from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( @@ -1036,11 +1023,6 @@ from .likee import ( LikeeIE, LikeeUserIE, ) -from .limelight import ( - LimelightChannelIE, - LimelightChannelListIE, - LimelightMediaIE, -) from .linkedin import ( LinkedInEventsIE, LinkedInIE, @@ -1106,6 +1088,7 @@ from .markiza import ( from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE +from .mave import MaveIE from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE @@ -1151,6 +1134,7 @@ from .minds import ( MindsIE, ) from .minoto import MinotoIE +from .mir24tv import Mir24TvIE from .mirrativ import ( MirrativIE, MirrativUserIE, @@ -1160,7 +1144,6 @@ from .mit import ( OCWMITIE, TechTVMITIE, ) -from .mitele import MiTeleIE from .mixch import ( MixchArchiveIE, MixchIE, @@ -1171,6 +1154,10 @@ from .mixcloud import ( MixcloudPlaylistIE, MixcloudUserIE, ) +from .mixlr import ( + MixlrIE, + MixlrRecoringIE, +) from .mlb import ( MLBIE, MLBTVIE, @@ -1194,15 +1181,7 @@ from .moview import MoviewPlayIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE -from .mtv import ( - MTVDEIE, - MTVIE, - MTVItaliaIE, - MTVItaliaProgrammaIE, - MTVJapanIE, - MTVServicesEmbeddedIE, - MTVVideoIE, -) +from .mtv import MTVIE from .muenchentv import MuenchenTVIE from .murrtube import ( MurrtubeIE, @@ -1260,6 +1239,7 @@ from .nba import ( ) from .nbc import ( NBCIE, + BravoTVIE, NBCNewsIE, NBCOlympicsIE, NBCOlympicsStreamIE, @@ -1267,6 +1247,7 @@ from .nbc import ( NBCSportsStreamIE, NBCSportsVPlayerIE, NBCStationsIE, + SyfyIE, ) from .ndr import ( NDRIE, @@ -1342,12 +1323,7 @@ from .nhk import ( NhkVodProgramIE, ) from .nhl import NHLIE -from .nick import ( - NickBrIE, - NickDeIE, - NickIE, - NickRuIE, -) +from .nick import NickIE from .niconico import ( NiconicoHistoryIE, NiconicoIE, @@ -1379,7 +1355,6 @@ from .nobelprize import NobelPrizeIE from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, @@ -1460,6 +1435,7 @@ from .onet import ( OnetPlIE, ) from .onionstudios import OnionStudiosIE +from .onsen import OnsenIE from .opencast import ( OpencastIE, OpencastPlaylistIE, @@ -1493,10 +1469,6 @@ from .panopto import ( PanoptoListIE, PanoptoPlaylistIE, ) -from .paramountplus import ( - ParamountPlusIE, - ParamountPlusSeriesIE, -) from .parler import ParlerIE from .parlview import ParlviewIE from .parti import ( @@ -1550,16 +1522,12 @@ from .piramidetv import ( PiramideTVChannelIE, PiramideTVIE, ) -from .pixivsketch import ( - PixivSketchIE, - PixivSketchUserIE, -) -from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE @@ -1570,6 +1538,7 @@ from .pluralsight import ( ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, @@ -1785,6 +1754,7 @@ from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE @@ -1808,7 +1778,6 @@ from .rutube import ( RutubePlaylistIE, RutubeTagsIE, ) -from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, @@ -1826,6 +1795,7 @@ from .safari import ( from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE +from .sauceplus import SaucePlusIE from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, @@ -1868,6 +1838,7 @@ from .shahid import ( from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE +from .shiey import ShieyIE from .showroomlive import ShowRoomLiveIE from .sibnet import SibnetEmbedIE from .simplecast import ( @@ -1876,7 +1847,6 @@ from .simplecast import ( SimplecastPodcastIE, ) from .sina import SinaIE -from .sixplay import SixPlayIE from .skeb import SkebIE from .sky import ( SkyNewsIE, @@ -1904,7 +1874,12 @@ from .skynewsau import SkyNewsAUIE from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE -from .smotrim import SmotrimIE +from .smotrim import ( + SmotrimAudioIE, + SmotrimIE, + SmotrimLiveIE, + SmotrimPlaylistIE, +) from .snapchat import SnapchatSpotlightIE from .snotr import SnotrIE from .softwhiteunderbelly import SoftWhiteUnderbellyIE @@ -1932,12 +1907,13 @@ from .soundgasm import ( SoundgasmProfileIE, ) from .southpark import ( + SouthParkComBrIE, + SouthParkCoUkIE, SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, SouthParkIE, SouthParkLatIE, - SouthParkNlIE, ) from .sovietscloset import ( SovietsClosetIE, @@ -1948,23 +1924,14 @@ from .spankbang import ( SpankBangPlaylistIE, ) from .spiegel import SpiegelIE -from .spike import ( - BellatorIE, - ParamountNetworkIE, -) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE -from .spotify import ( - SpotifyIE, - SpotifyShowIE, -) from .spreaker import ( SpreakerIE, SpreakerShowIE, ) from .springboardplatform import SpringboardPlatformIE -from .sprout import SproutIE from .sproutvideo import ( SproutVideoIE, VidsIoIE, @@ -1986,6 +1953,7 @@ from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( SteamCommunityBroadcastIE, + SteamCommunityIE, SteamIE, ) from .stitcher import ( @@ -2015,13 +1983,11 @@ from .sverigesradio import ( SverigesRadioPublicationIE, ) from .svt import ( - SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syfy import SyfyIE from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE @@ -2100,6 +2066,7 @@ from .theguardian import ( TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( @@ -2146,6 +2113,7 @@ from .toggle import ( from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE +from .toutiao import ToutiaoIE from .toutv import TouTvIE from .toypics import ( ToypicsIE, @@ -2168,7 +2136,6 @@ from .trtworld import TrtWorldIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, @@ -2180,6 +2147,7 @@ from .tubitv import ( ) from .tumblr import TumblrIE from .tunein import ( + TuneInEmbedIE, TuneInPodcastEpisodeIE, TuneInPodcastIE, TuneInShortenerIE, @@ -2218,7 +2186,6 @@ from .tvc import ( from .tver import TVerIE from .tvigle import TvigleIE from .tviplayer import TVIPlayerIE -from .tvland import TVLandIE from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvopengr import ( @@ -2239,6 +2206,7 @@ from .tvplay import ( from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE @@ -2287,6 +2255,7 @@ from .uliza import ( ) from .umg import UMGDeIE from .unistra import UnistraIE +from .unitednations import UnitedNationsWebTvIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, @@ -2313,7 +2282,6 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE -from .vesti import VestiIE from .vevo import ( VevoIE, VevoPlaylistIE, @@ -2368,6 +2336,7 @@ from .vimeo import ( VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, + VimeoEventIE, VimeoGroupsIE, VimeoIE, VimeoLikesIE, @@ -2501,7 +2470,6 @@ from .wykop import ( WykopPostCommentIE, WykopPostIE, ) -from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( XHamsterEmbedIE, diff --git a/plugins/youtube_download/yt_dlp/extractor/abc.py b/plugins/youtube_download/yt_dlp/extractor/abc.py index 7296be7..2e66178 100644 --- a/plugins/youtube_download/yt_dlp/extractor/abc.py +++ b/plugins/youtube_download/yt_dlp/extractor/abc.py @@ -21,7 +21,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn|listen)/(?:[^/?#]+/){1,4}(?P\d{5,})' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', @@ -53,8 +53,9 @@ class ABCIE(InfoExtractor): 'info_dict': { 'id': '6880080', 'ext': 'mp3', - 'title': 'NAB lifts interest rates, following Westpac and CBA', + 'title': 'NAB lifts interest rates, following Westpac and CBA - ABC listen', 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', + 'thumbnail': r're:https://live-production\.wcms\.abc-cdn\.net\.au/2193d7437c84b25eafd6360c82b5fa21', }, }, { 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', @@ -64,8 +65,9 @@ class ABCIE(InfoExtractor): 'info_dict': { 'id': '10527914', 'ext': 'mp4', - 'title': 'WWI Centenary', - 'description': 'md5:c2379ec0ca84072e86b446e536954546', + 'title': 'WWI Centenary - Behind The News', + 'description': 'md5:fa4405939ff750fade46ff0cd4c66a52', + 'thumbnail': r're:https://live-production\.wcms\.abc-cdn\.net\.au/bcc3433c97bf992dff32ec5a768713c9', }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', @@ -73,7 +75,8 @@ class ABCIE(InfoExtractor): 'id': '12342074', 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', - 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', + 'description': 'md5:625257209f2d14ce23cb4e3785da9beb', + 'thumbnail': r're:https://live-production\.wcms\.abc-cdn\.net\.au/7ee6f190de6d7dbb04203e514bfae9ec', }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', @@ -93,7 +96,16 @@ class ABCIE(InfoExtractor): 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus', 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', - 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', + 'thumbnail': r're:https://live-production\.wcm\.abc-cdn\.net\.au/0c170f5b57f0105c432f366c0e8e267b', + }, + }, { + 'url': 'https://www.abc.net.au/listen/programs/the-followers-madness-of-two/presents-followers-madness-of-two/105697646', + 'info_dict': { + 'id': '105697646', + 'title': 'INTRODUCING — The Followers: Madness of Two - ABC listen', + 'ext': 'mp3', + 'description': 'md5:2310cd0d440a4e01656abea15db8d1f3', + 'thumbnail': r're:https://live-production\.wcms\.abc-cdn\.net\.au/90d7078214e5d66553ffb7fcf0da0cda', }, }] diff --git a/plugins/youtube_download/yt_dlp/extractor/adobepass.py b/plugins/youtube_download/yt_dlp/extractor/adobepass.py index f1b8779..eb45734 100644 --- a/plugins/youtube_download/yt_dlp/extractor/adobepass.py +++ b/plugins/youtube_download/yt_dlp/extractor/adobepass.py @@ -3,6 +3,7 @@ import json import re import time import urllib.parse +import uuid import xml.etree.ElementTree as etree from .common import InfoExtractor @@ -10,6 +11,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( NO_DEFAULT, ExtractorError, + parse_qs, unescapeHTML, unified_timestamp, urlencode_postdata, @@ -45,6 +47,7 @@ MSO_INFO = { 'name': 'Comcast XFINITY', 'username_field': 'user', 'password_field': 'passwd', + 'login_hostname': 'login.xfinity.com', }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -74,6 +77,12 @@ MSO_INFO = { 'name': 'Verizon FiOS', 'username_field': 'IDToken1', 'password_field': 'IDToken2', + 'login_hostname': 'ssoauth.verizon.com', + }, + 'Fubo': { + 'name': 'Fubo', + 'username_field': 'username', + 'password_field': 'password', }, 'Cablevision': { 'name': 'Optimum/Cablevision', @@ -1338,6 +1347,7 @@ MSO_INFO = { 'name': 'Sling TV', 'username_field': 'username', 'password_field': 'password', + 'login_hostname': 'identity.sling.com', }, 'Suddenlink': { 'name': 'Suddenlink', @@ -1355,7 +1365,6 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' - _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0' _MVPD_CACHE = 'ap-mvpd' _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' @@ -1367,6 +1376,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en return super()._download_webpage_handle( *args, **kwargs) + @staticmethod + def _get_mso_headers(mso_info): + # Not needed currently + return {} + @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): channel = etree.Element('channel') @@ -1382,7 +1396,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en resource_rating.text = rating return '' + etree.tostring(channel).decode() + '' - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement): + mso_id = self.get_param('ap_mso') + if mso_id: + mso_info = MSO_INFO[mso_id] + else: + mso_info = {} + def xml_text(xml_str, tag): return self._search_regex( f'<{tag}>(.+?)', xml_str, tag) @@ -1391,15 +1411,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) - def post_form(form_page_res, note, data={}): + def post_form(form_page_res, note, data={}, validate_url=False): form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): post_url = urllib.parse.urljoin(urlh.url, post_url) + if validate_url: + # This request is submitting credentials so we should validate it when possible + url_parsed = urllib.parse.urlparse(post_url) + expected_hostname = mso_info.get('login_hostname') + if expected_hostname and expected_hostname != url_parsed.hostname: + raise ExtractorError( + f'Unexpected login URL hostname; expected "{expected_hostname}" but got ' + f'"{url_parsed.hostname}". Aborting before submitting credentials') + if url_parsed.scheme != 'https': + self.write_debug('Upgrading login URL scheme to https') + post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https')) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + **self._get_mso_headers(mso_info), 'Content-Type': 'application/x-www-form-urlencoded', }) @@ -1432,40 +1464,72 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en } guid = xml_text(resource, 'guid') if '<' in resource else resource - count = 0 - while count < 2: + for _ in range(2): requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None if not authn_token: - mso_id = self.get_param('ap_mso') - if mso_id: - username, password = self._get_login_info('ap_username', 'ap_password', mso_id) - if not username or not password: - raise_mvpd_required() - mso_info = MSO_INFO[mso_id] - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }, headers={ - # yt-dlp's default user-agent is usually too old for Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - 'User-Agent': self._MODERN_USER_AGENT, - } if mso_id == 'Comcast_SSO' else None) - elif not self._cookies_passed: + if not mso_id: + raise_mvpd_required() + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: raise_mvpd_required() - if not mso_id: - pass - elif mso_id == 'Comcast_SSO': + device_info, urlh = self._download_json_handle( + 'https://sp.auth.adobe.com/indiv/devices', + video_id, 'Registering device with Adobe', + data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + device_id = device_info['deviceId'] + mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp') + mvpd_headers['Ap_21'] = device_id + + registration = self._download_json( + 'https://sp.auth.adobe.com/o/client/register', + video_id, 'Registering client with Adobe', + data=json.dumps({'software_statement': software_statement}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + access_token = self._download_json( + 'https://sp.auth.adobe.com/o/client/token', video_id, + 'Obtaining access token', data=urlencode_postdata({ + 'grant_type': 'client_credentials', + 'client_id': registration['client_id'], + 'client_secret': registration['client_secret'], + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + })['access_token'] + mvpd_headers['Authorization'] = f'Bearer {access_token}' + + reg_code = self._download_json( + f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode', + video_id, 'Obtaining registration code', + data=urlencode_postdata({ + 'requestor': requestor_id, + 'deviceId': device_id, + 'format': 'json', + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Authorization': f'Bearer {access_token}', + })['code'] + + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + 'reg_code': reg_code, + }, headers=self._get_mso_headers(mso_info)) + + if mso_id == 'Comcast_SSO': # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res @@ -1489,8 +1553,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en oauth_redirect_url = extract_redirect_url( provider_redirect_page, fatal=True) provider_login_page_res = self._download_webpage_handle( - oauth_redirect_url, video_id, - self._DOWNLOADING_LOGIN_PAGE) + oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE, + headers=self._get_mso_headers(mso_info)) else: provider_login_page_res = post_form( provider_redirect_page_res, @@ -1500,24 +1564,35 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) mvpd_confirm_page, urlh = mvpd_confirm_page_res if '' in mvpd_confirm_page: post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Philo': # Philo has very unique authentication method - self._download_webpage( - 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({ + self._request_webpage( + 'https://idp.philo.com/auth/init/login_code', video_id, + 'Requesting Philo auth code', data=json.dumps({ 'ident': username, 'device': 'web', 'send_confirm_link': False, 'send_token': True, - })) + 'device_ident': f'web-{uuid.uuid4().hex}', + 'include_login_link': True, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + philo_code = getpass.getpass('Type auth code you have received [Return]: ') - self._download_webpage( - 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code, - })) + self._request_webpage( + 'https://idp.philo.com/auth/update/login_code', video_id, + 'Submitting token', data=json.dumps({'token': philo_code}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Verizon': @@ -1539,7 +1614,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_redirect_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) saml_login_page, urlh = saml_login_page_res if 'Please try again.' in saml_login_page: raise ExtractorError( @@ -1560,7 +1635,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en [saml_login_page, saml_redirect_url], 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) if 'Please try again.' in saml_login_page: raise ExtractorError( 'Failed to login, incorrect User ID or Password.') @@ -1631,7 +1706,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1682,7 +1757,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1699,6 +1774,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en query=hidden_data) post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Fubo': + _, urlh = provider_redirect_page_res + + fubo_response = self._download_json( + 'https://api.fubo.tv/partners/tve/connect', video_id, + 'Authenticating with Fubo', 'Unable to authenticate with Fubo', + query=parse_qs(urlh.url), data=json.dumps({ + 'username': username, + 'password': password, + }).encode(), headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }) + + self._request_webpage( + 'https://sp.auth.adobe.com/adobe-services/oauth2', video_id, + 'Authenticating with Adobe', 'Failed to authenticate with Adobe', + query={ + 'code': fubo_response['code'], + 'state': fubo_response['state'], + }) else: # Some providers (e.g. DIRECTV NOW) have another meta refresh # based redirect that should be followed. @@ -1717,7 +1813,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en } if mso_id in ('Cablevision', 'AlticeOne'): form_data['_eventId_proceed'] = '' - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data) + mvpd_confirm_page_res = post_form( + provider_login_page_res, 'Logging in', form_data, validate_url=True) if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') @@ -1727,6 +1824,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en 'Retrieving Session', data=urlencode_postdata({ '_method': 'GET', 'requestor_id': requestor_id, + 'reg_code': reg_code, }), headers=mvpd_headers) except ExtractorError as e: if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -1734,7 +1832,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en raise if '\d+)' - _TEST = { - 'url': 'https://tv.adobe.com/embed/22/4153', - 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', - 'info_dict': { - 'id': '4153', - 'ext': 'flv', - 'title': 'Creating Graphics Optimized for BlackBerry', - 'description': 'md5:eac6e8dced38bdaae51cd94447927459', - 'thumbnail': r're:https?://.*\.jpg$', - 'upload_date': '20091109', - 'duration': 377, - 'view_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_data = self._call_api( - 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] - return self._parse_video_data(video_data) - - -class AdobeTVIE(AdobeTVBaseIE): +class AdobeTVVideoIE(InfoExtractor): IE_NAME = 'adobetv' - _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' - - _TEST = { - 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', - 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', - 'info_dict': { - 'id': '10981', - 'ext': 'mp4', - 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', - 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', - 'thumbnail': r're:https?://.*\.jpg$', - 'upload_date': '20110914', - 'duration': 60, - 'view_count': int, - }, - } - - def _real_extract(self, url): - language, show_urlname, urlname = self._match_valid_url(url).groups() - if not language: - language = 'en' - - video_data = self._call_api( - 'episode/get', urlname, { - 'disclosure': 'standard', - 'language': language, - 'show_urlname': show_urlname, - 'urlname': urlname, - })[0] - return self._parse_video_data(video_data) - - -class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): - _PAGE_SIZE = 25 - - def _fetch_page(self, display_id, query, page): - page += 1 - query['page'] = page - for element_data in self._call_api( - self._RESOURCE, display_id, query, f'Download Page {page}'): - yield self._process_data(element_data) - - def _extract_playlist_entries(self, display_id, query): - return OnDemandPagedList(functools.partial( - self._fetch_page, display_id, query), self._PAGE_SIZE) - - -class AdobeTVShowIE(AdobeTVPlaylistBaseIE): - IE_NAME = 'adobetv:show' - _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' - - _TEST = { - 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', - 'info_dict': { - 'id': '36', - 'title': 'The Complete Picture with Julieanne Kost', - 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', - }, - 'playlist_mincount': 136, - } - _RESOURCE = 'episode' - _process_data = AdobeTVBaseIE._parse_video_data - - def _real_extract(self, url): - language, show_urlname = self._match_valid_url(url).groups() - if not language: - language = 'en' - query = { - 'disclosure': 'standard', - 'language': language, - 'show_urlname': show_urlname, - } - - show_data = self._call_api( - 'show/get', show_urlname, query)[0] - - return self.playlist_result( - self._extract_playlist_entries(show_urlname, query), - str_or_none(show_data.get('id')), - show_data.get('show_name'), - show_data.get('show_description')) - - -class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): - IE_NAME = 'adobetv:channel' - _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' - - _TEST = { - 'url': 'http://tv.adobe.com/channel/development', - 'info_dict': { - 'id': 'development', - }, - 'playlist_mincount': 96, - } - _RESOURCE = 'show' - - def _process_data(self, show_data): - return self.url_result( - show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) - - def _real_extract(self, url): - language, channel_urlname, category_urlname = self._match_valid_url(url).groups() - if not language: - language = 'en' - query = { - 'channel_urlname': channel_urlname, - 'language': language, - } - if category_urlname: - query['category_urlname'] = category_urlname - - return self.playlist_result( - self._extract_playlist_entries(channel_urlname, query), - channel_urlname) - - -class AdobeTVVideoIE(AdobeTVBaseIE): - IE_NAME = 'adobetv:video' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' - _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] - - _TEST = { - # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners - 'url': 'https://video.tv.adobe.com/v/2456/', + _EMBED_REGEX = [r']+src=["\'](?P(?:https?:)?//video\.tv\.adobe\.com/v/\d+)'] + _TESTS = [{ + 'url': 'https://video.tv.adobe.com/v/2456', 'md5': '43662b577c018ad707a63766462b1e87', 'info_dict': { 'id': '2456', 'ext': 'mp4', 'title': 'New experience with Acrobat DC', 'description': 'New experience with Acrobat DC', - 'duration': 248.667, + 'duration': 248.522, + 'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg', }, - } + }, { + 'url': 'https://video.tv.adobe.com/v/3463980/adobe-acrobat', + 'info_dict': { + 'id': '3463980', + 'ext': 'mp4', + 'title': 'Adobe Acrobat: How to Customize the Toolbar for Faster PDF Editing', + 'description': 'md5:94368ab95ae24f9c1bee0cb346e03dc3', + 'duration': 97.514, + 'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg', + }, + }] + _WEBPAGE_TESTS = [{ + # https://video.tv.adobe.com/v/3442499 + 'url': 'https://business.adobe.com/dx-fragments/summit/2025/marquees/S335/ondemand.live.html', + 'info_dict': { + 'id': '3442499', + 'ext': 'mp4', + 'title': 'S335 - Beyond Personalization: Creating Intent-Based Experiences at Scale', + 'description': 'Beyond Personalization: Creating Intent-Based Experiences at Scale', + 'duration': 2906.8, + 'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - video_data = self._parse_json(self._search_regex( - r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) - title = video_data['title'] + video_data = self._search_json( + r'var\s+bridge\s*=', webpage, 'bridged data', video_id) formats = [] - sources = video_data.get('sources') or [] - for source in sources: - source_src = source.get('src') - if not source_src: - continue - formats.append({ - 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), - 'format_id': join_nonempty(source.get('format'), source.get('label')), - 'height': int_or_none(source.get('height') or None), - 'tbr': int_or_none(source.get('bitrate') or None), - 'width': int_or_none(source.get('width') or None), - 'url': source_src, - }) + for source in traverse_obj(video_data, ( + 'sources', lambda _, v: v['format'] != 'playlist' and url_or_none(v['src']), + )): + source_url = self._proto_relative_url(source['src']) + if determine_ext(source_url) == 'm3u8': + fmts = self._extract_m3u8_formats( + source_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + else: + fmts = [{'url': source_url}] - # For both metadata and downloaded files the duration varies among - # formats. I just pick the max one - duration = max(filter(None, [ - float_or_none(source.get('duration'), scale=1000) - for source in sources])) + for fmt in fmts: + fmt.update(traverse_obj(source, { + 'duration': ('duration', {float_or_none(scale=1000)}), + 'filesize': ('kilobytes', {float_or_none(invscale=1000)}), + 'format_id': (('format', 'label'), {str}, all, {lambda x: join_nonempty(*x)}), + 'height': ('height', {int_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'width': ('width', {int_or_none}), + })) + formats.extend(fmts) + + subtitles = {} + for translation in traverse_obj(video_data, ( + 'translations', lambda _, v: url_or_none(v['vttPath']), + )): + lang = translation.get('language_w3c') or ISO639Utils.long2short(translation.get('language_medium')) or 'und' + subtitles.setdefault(lang, []).append({ + 'ext': 'vtt', + 'url': self._proto_relative_url(translation['vttPath']), + }) return { 'id': video_id, 'formats': formats, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('video', {}).get('poster'), - 'duration': duration, - 'subtitles': self._parse_subtitles(video_data, 'vttPath'), + 'subtitles': subtitles, + **traverse_obj(video_data, { + 'title': ('title', {clean_html}), + 'description': ('description', {clean_html}, filter), + 'thumbnail': ('video', 'poster', {self._proto_relative_url}, {url_or_none}), + }), } diff --git a/plugins/youtube_download/yt_dlp/extractor/adultswim.py b/plugins/youtube_download/yt_dlp/extractor/adultswim.py index 2c83701..a399c3a 100644 --- a/plugins/youtube_download/yt_dlp/extractor/adultswim.py +++ b/plugins/youtube_download/yt_dlp/extractor/adultswim.py @@ -84,6 +84,8 @@ class AdultSwimIE(TurnerBaseIE): 'skip': '404 Not Found', }] + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwNjg5ZmU2My00OTc5LTQxZmQtYWYxNC1hYjVlNmJjNWVkZWIiLCJuYmYiOjE1MzcxOTA2NzQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwNjc0fQ.Xl3AEduM0s1TxDQ6-XssdKIiLm261hhsEv1C1yo_nitIajZThSI9rXILqtIzO0aujoHhdzUnu_dUCq9ffiSBzEG632tTa1la-5tegHtce80cMhewBN4n2t8n9O5tiaPx8MPY8ALdm5wS7QzWE6DO_LTJKgE8Bl7Yv-CWJT4q4SywtNiQWLVOuhBRnDyfsRezxRwptw8qTn9dv5ZzUrVJaby5fDZ_nOncMKvegOgaKd5KEuCAGQ-mg-PSuValMjGuf6FwDguGaK7IyI5Y2oOrzXmD4Dj7q4WBg8w9QoZhtLeAU56mcsGILolku2R5FHlVLO9xhjResyt-pfmegOkpSw' + def _real_extract(self, url): show_path, episode_path = self._match_valid_url(url).groups() display_id = episode_path or show_path @@ -152,7 +154,7 @@ class AdultSwimIE(TurnerBaseIE): # CDN_TOKEN_APP_ID from: # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', - }, { + }, self._SOFTWARE_STATEMENT, { 'url': url, 'site_name': 'AdultSwim', 'auth_required': auth, diff --git a/plugins/youtube_download/yt_dlp/extractor/aenetworks.py b/plugins/youtube_download/yt_dlp/extractor/aenetworks.py index 8e25786..a4a5f40 100644 --- a/plugins/youtube_download/yt_dlp/extractor/aenetworks.py +++ b/plugins/youtube_download/yt_dlp/extractor/aenetworks.py @@ -1,3 +1,5 @@ +import json + from .theplatform import ThePlatformIE from ..utils import ( ExtractorError, @@ -6,7 +8,6 @@ from ..utils import ( remove_start, traverse_obj, update_url_query, - urlencode_postdata, ) @@ -20,13 +21,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _THEPLATFORM_KEY = '43jXaGRQud' _THEPLATFORM_SECRET = 'S10BPXHMlb' _DOMAIN_MAP = { - 'history.com': ('HISTORY', 'history'), - 'aetv.com': ('AETV', 'aetv'), - 'mylifetime.com': ('LIFETIME', 'lifetime'), - 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'), - 'fyi.tv': ('FYI', 'fyi'), - 'historyvault.com': (None, 'historyvault'), - 'biography.com': (None, 'biography'), + 'history.com': ('HISTORY', 'history', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzZlMTQ3ZS0zMzFhLTQxY2YtYTMwNC01MDA2NzNlOGYwYjYiLCJuYmYiOjE1Mzg2NjMzMDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMzA5fQ.n24-FVHLGXJe2D4atIQZ700aiXKIajKh5PWFoHJ40Az4itjtwwSFHnvufnoal3T8lYkwNLxce7H-IEGxIykRkZEdwq09pMKMT-ft9ASzE4vQ8fAWbf5ZgDME86x4Jq_YaxkRc9Ne0eShGhl8fgTJHvk07sfWcol61HJ7kU7K8FzzcHR0ucFQgA5VNd8RyjoGWY7c6VxnXR214LOpXsywmit04-vGJC102b_WA2EQfqI93UzG6M6l0EeV4n0_ijP3s8_i8WMJZ_uwnTafCIY6G_731i01dKXDLSFzG1vYglAwDa8DTcdrAAuIFFDF6QNGItCCmwbhjufjmoeVb7R1Gg'), + 'aetv.com': ('AETV', 'aetv', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI5Y2IwNjg2Yy03ODUxLTRiZDUtODcyMC00MjNlZTg1YTQ1NzMiLCJuYmYiOjE1Mzg2NjMyOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMjkwfQ.T5Elf0X4TndO4NEgqBas1gDxNHGPVk_daO2Ha5FBzVO6xi3zM7eavdAKfYMCN7gpWYJx03iADaVPtczO_t_aGZczDjpwJHgTUzDgvcLZAVsVDqtDIAMy3S846rPgT6UDbVoxurA7B2VTPm9phjrSXhejvd0LBO8MQL4AZ3sy2VmiPJ2noT1ily5PuHCYlkrT1fheO064duR__Cd9DQ5VTMnKjzY3Cx345CEwKDkUk5gwgxhXM-aY0eblehrq8VD81_aRM_O3tvh7nbTydHOnUpV-k_iKVi49gqz7Sf8zb6Zh5z2Uftn3vYCfE5NQuesitoRMnsH17nW7o_D59hkRgg'), + 'mylifetime.com': ('LIFETIME', 'lifetime', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJmODg0MDM1ZC1mZGRmLTRmYjgtYmRkMC05MzRhZDdiYTAwYTciLCJuYmYiOjE1NDkzOTI2NDQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQ5MzkyNjQ0fQ.vkTIaCpheKdKQd__2-3ec4qkcpbAhyCTvwe5iTl922ItSQfVhpEJG4wseVSNmBTrpBi0hvLedcw6Hj1_UuzBMVuVcCqLprU-pI8recEwL0u7G-eVkylsxe1OTUm1o3V6OykXQ9KlA-QQLL1neUhdhR1n5B1LZ4cmtBmiEpfgf4rFwXD1ScFylIcaWKLBqHoRBNUmxyTmoXXvn_A-GGSj9eCizFzY8W5uBwUcsoiw2Cr1skx7PbB2RSP1I5DsoIJKG-8XV1KS7MWl-fNLjE-hVAsI9znqfEEFcPBiv3LhCP4Nf4OIs7xAselMn0M0c8igRUZhURWX_hdygUAxkbKFtQ'), + 'fyi.tv': ('FYI', 'fyi', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxOGZiOWM3Ny1mYmMzLTQxYTktYmE1Yi1lMzM0ZmUzNzU4NjEiLCJuYmYiOjE1ODc1ODAzNzcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NTgwMzc3fQ.AYDuipKswmIfLBfOjHRsfc5fMV5NmJUmiJnkpiep4VEw9QiXkygFj4bN06Si5tFc5Mee5TDrGzDpV6iuKbVpLT5kuqXhAn-Wozf5zKPsg_IpdEKO7gsiCq4calt72ct44KTqtKD_hVcoxQU24_HaJsRgXzu3B-6Ff6UrmsXkyvYifYVC9v2DSkdCuA02_IrlllzVT2kRuefUXgL4vQRtTFf77uYa0RKSTG7uVkiQ_AU41eXevKlO2qgtc14Hk5cZ7-ZNrDyMCXYA5ngdIHP7Gs9PWaFXT36PFHI_rC4EfxUABPzjQFxjpP75aX5qn8SH__HbM9q3hoPWgaEaf76qIQ'), + 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc', None), + 'historyvault.com': (None, 'historyvault', None), + 'biography.com': (None, 'biography', None), } def _extract_aen_smil(self, smil_url, video_id, auth=None): @@ -71,7 +72,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE } def _extract_aetn_info(self, domain, filter_key, filter_value, url): - requestor_id, brand = self._DOMAIN_MAP[domain] + requestor_id, brand, software_statement = self._DOMAIN_MAP[domain] result = self._download_json( f'https://feeds.video.aetnd.com/api/v2/{brand}/videos', filter_value, query={f'filter[{filter_key}]': filter_value}) @@ -95,7 +96,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), traverse_obj(theplatform_metadata, ('ratings', 0, 'rating'))) auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) + url, video_id, requestor_id, resource, software_statement) info.update(self._extract_aen_smil(media_url, video_id, auth)) info.update({ 'title': title, @@ -110,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -127,20 +126,18 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], 'episode_number': 1, - 'episode': 'Episode 1', + 'episode': 'Winter Is Coming', 'season': 'Season 1', 'season_number': 1, 'series': 'Mountain Men', + 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -154,21 +151,64 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', - 'episode': 'Episode 1', + 'episode': 'Inlawful Entry', 'episode_number': 1, 'season': 'Season 9', 'season_number': 9, 'series': 'Duck Dynasty', + 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', + }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', + }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], - 'skip': 'This video is only available for users of participating TV providers.', }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'only_matching': True, @@ -196,25 +236,28 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) class AENetworksListBaseIE(AENetworksBaseIE): def _call_api(self, resource, slug, brand, fields): return self._download_json( - 'https://yoga.appsvcs.aetnd.com/graphql', - slug, query={'brand': brand}, data=urlencode_postdata({ + 'https://yoga.appsvcs.aetnd.com/graphql', slug, + query={'brand': brand}, headers={'Content-Type': 'application/json'}, + data=json.dumps({ 'query': '''{ %s(slug: "%s") { %s } }''' % (resource, slug, fields), # noqa: UP031 - }))['data'][resource] + }).encode())['data'][resource] def _real_extract(self, url): domain, slug = self._match_valid_url(url).groups() - _, brand = self._DOMAIN_MAP[domain] + _, brand, _ = self._DOMAIN_MAP[domain] playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) base_url = f'http://watch.{domain}' diff --git a/plugins/youtube_download/yt_dlp/extractor/amcnetworks.py b/plugins/youtube_download/yt_dlp/extractor/amcnetworks.py index 15a86e2..3817f35 100644 --- a/plugins/youtube_download/yt_dlp/extractor/amcnetworks.py +++ b/plugins/youtube_download/yt_dlp/extractor/amcnetworks.py @@ -1,32 +1,24 @@ -import re - -from .theplatform import ThePlatformIE -from ..utils import ( - int_or_none, - parse_age_limit, - try_get, - update_url_query, -) +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor +from ..utils.traversal import traverse_obj -class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?://(?:www\.)?(?Pamc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)' +class AMCNetworksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)' _TESTS = [{ - 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631', + 'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027', 'info_dict': { - 'id': '4Lq1dzOnZGt0', + 'id': '6369261343112', 'ext': 'mp4', - 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner", - 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.", - 'upload_date': '20201120', - 'timestamp': 1605904350, - 'uploader': 'AMCN', + 'title': 'Dark Winds: A Look at Season 3', + 'uploader_id': '6240731308001', + 'duration': 176.427, + 'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg', + 'tags': [], + 'timestamp': 1740414792, + 'upload_date': '20250224', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': '404 Not Found', + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'only_matching': True, @@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1', 'only_matching': True, }] - _REQUESTOR_ID_MAP = { - 'amc': 'AMC', - 'bbcamerica': 'BBCA', - 'ifc': 'IFC', - 'sundancetv': 'SUNDANCE', - 'wetv': 'WETV', - } def _real_extract(self, url): - site, display_id = self._match_valid_url(url).groups() - requestor_id = self._REQUESTOR_ID_MAP[site] - page_data = self._download_json( - f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', - display_id)['data'] - properties = page_data.get('properties') or {} - query = { - 'mbr': 'true', - 'manifest': 'm3u', - } + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + initial_data = self._search_json( + r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id) + video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str})) + if not video_id: # All locked videos are now DRM-protected + self.report_drm(display_id) + account_id = initial_data['config']['brightcove']['accountId'] + player_id = initial_data['config']['brightcove']['playerId'] - video_player_count = 0 - try: - for v in page_data['children']: - if v.get('type') == 'video-player': - release_pid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + release_pid - media_url = 'https://link.theplatform.com/s/' + tp_path - video_player_count += 1 - except KeyError: - pass - if video_player_count > 1: - self.report_warning( - f'The JSON data has {video_player_count} video players. Only one will be extracted') - - # Fall back to videoPid if releasePid not found. - # TODO: Fall back to videoPid if releasePid manifest uses DRM. - if not video_player_count: - tp_path = 'M_UwQC/media/' + properties['videoPid'] - media_url = 'https://link.theplatform.com/s/' + tp_path - - theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id) - info = self._parse_theplatform_metadata(theplatform_metadata) - video_id = theplatform_metadata['pid'] - title = theplatform_metadata['title'] - rating = try_get( - theplatform_metadata, lambda x: x['ratings'][0]['rating']) - video_category = properties.get('videoCategory') - if video_category and video_category.endswith('-Auth'): - resource = self._get_mvpd_resource( - requestor_id, title, video_id, rating) - query['auth'] = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) - media_url = update_url_query(media_url, query) - formats, subtitles = self._extract_theplatform_smil( - media_url, video_id) - - thumbnails = [] - thumbnail_urls = [properties.get('imageDesktop')] - if 'thumbnail' in info: - thumbnail_urls.append(info.pop('thumbnail')) - for thumbnail_url in thumbnail_urls: - if not thumbnail_url: - continue - mobj = re.search(r'(\d+)x(\d+)', thumbnail_url) - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(mobj.group(1)) if mobj else None, - 'height': int(mobj.group(2)) if mobj else None, - }) - - info.update({ - 'age_limit': parse_age_limit(rating), - 'formats': formats, - 'id': video_id, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - }) - ns_keys = theplatform_metadata.get('$xmlns', {}).keys() - if ns_keys: - ns = next(iter(ns_keys)) - episode = theplatform_metadata.get(ns + '$episodeTitle') or None - episode_number = int_or_none( - theplatform_metadata.get(ns + '$episode')) - season_number = int_or_none( - theplatform_metadata.get(ns + '$season')) - series = theplatform_metadata.get(ns + '$show') or None - info.update({ - 'episode': episode, - 'episode_number': episode_number, - 'season_number': season_number, - 'series': series, - }) - return info + return self.url_result( + f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', + BrightcoveNewIE, video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/apa.py b/plugins/youtube_download/yt_dlp/extractor/apa.py index fed5970..db82b56 100644 --- a/plugins/youtube_download/yt_dlp/extractor/apa.py +++ b/plugins/youtube_download/yt_dlp/extractor/apa.py @@ -11,12 +11,11 @@ class APAIE(InfoExtractor): _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1'] _TESTS = [{ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'md5': '2b12292faeb0a7d930c778c7a5b4759b', 'info_dict': { 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', 'ext': 'mp4', 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', }, }, { 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', @@ -28,6 +27,15 @@ class APAIE(InfoExtractor): 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.vol.at/blue-man-group/5593454', + 'info_dict': { + 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'ext': 'mp4', + 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', + }, + }] def _real_extract(self, url): mobj = self._match_valid_url(url) diff --git a/plugins/youtube_download/yt_dlp/extractor/appleconnect.py b/plugins/youtube_download/yt_dlp/extractor/appleconnect.py index 433eb4e..7867194 100644 --- a/plugins/youtube_download/yt_dlp/extractor/appleconnect.py +++ b/plugins/youtube_download/yt_dlp/extractor/appleconnect.py @@ -1,47 +1,125 @@ +import time + from .common import InfoExtractor -from ..utils import ExtractorError, str_to_int +from ..utils import ( + ExtractorError, + extract_attributes, + float_or_none, + jwt_decode_hs256, + jwt_encode, + parse_resolution, + qualities, + unified_strdate, + update_url, + url_or_none, + urljoin, +) +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) class AppleConnectIE(InfoExtractor): - _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P[\w-]+)' + IE_NAME = 'apple:music:connect' + IE_DESC = 'Apple Music Connect' + + _BASE_URL = 'https://music.apple.com' + _QUALITIES = { + 'provisionalUploadVideo': None, + 'sdVideo': 480, + 'sdVideoWithPlusAudio': 480, + 'sd480pVideo': 480, + '720pHdVideo': 720, + '1080pHdVideo': 1080, + } + _VALID_URL = r'https?://music\.apple\.com/[\w-]+/post/(?P\d+)' _TESTS = [{ - 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', - 'md5': 'c1d41f72c8bcaf222e089434619316e4', + 'url': 'https://music.apple.com/us/post/1018290019', 'info_dict': { - 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'id': '1018290019', 'ext': 'm4v', 'title': 'Energy', - 'uploader': 'Drake', - 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 177.911, + 'thumbnail': r're:https?://.+\.png', 'upload_date': '20150710', - 'timestamp': 1436545535, + 'uploader': 'Drake', }, }, { - 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', - 'only_matching': True, + 'url': 'https://music.apple.com/us/post/1016746627', + 'info_dict': { + 'id': '1016746627', + 'ext': 'm4v', + 'title': 'Body Shop (Madonna) - Chellous Lima (Acoustic Cover)', + 'duration': 210.278, + 'thumbnail': r're:https?://.+\.png', + 'upload_date': '20150706', + 'uploader': 'Chellous Lima', + }, }] + _jwt = None + + @staticmethod + def _jwt_is_expired(token): + return jwt_decode_hs256(token)['exp'] - time.time() < 120 + + def _get_token(self, webpage, video_id): + if self._jwt and not self._jwt_is_expired(self._jwt): + return self._jwt + + js_url = traverse_obj(webpage, ( + {find_element(tag='script', attr='crossorigin', value='', html=True)}, + {extract_attributes}, 'src', {urljoin(self._BASE_URL)}, {require('JS URL')})) + js = self._download_webpage( + js_url, video_id, 'Downloading token JS', 'Unable to download token JS') + + header = jwt_encode({}, '', headers={'alg': 'ES256', 'kid': 'WebPlayKid'}).split('.')[0] + self._jwt = self._search_regex( + fr'(["\'])(?P{header}(?:\.[\w-]+){{2}})\1', js, 'JSON Web Token', group='jwt') + if self._jwt_is_expired(self._jwt): + raise ExtractorError('The fetched token is already expired') + + return self._jwt + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - try: - video_json = self._html_search_regex( - r'class="auc-video-data">(\{.*?\})', webpage, 'json') - except ExtractorError: - raise ExtractorError('This post doesn\'t contain a video', expected=True) + videos = self._download_json( + 'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos', + video_id, headers={ + 'Authorization': f'Bearer {self._get_token(webpage, video_id)}', + 'Origin': self._BASE_URL, + }, query={'ids': video_id, 'l': 'en-US'}) + attributes = traverse_obj(videos, ( + 'data', ..., 'attributes', any, {require('video information')})) - video_data = self._parse_json(video_json, video_id) - timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) - like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) + formats = [] + quality = qualities(list(self._QUALITIES.keys())) + for format_id, src_url in traverse_obj(attributes, ( + 'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]), + )): + formats.append({ + 'ext': 'm4v', + 'format_id': format_id, + 'height': self._QUALITIES.get(format_id), + 'quality': quality(format_id), + 'url': src_url, + **parse_resolution(update_url(src_url, query=None), lenient=True), + }) return { 'id': video_id, - 'url': video_data['sslSrc'], - 'title': video_data['title'], - 'description': video_data['description'], - 'uploader': video_data['artistName'], - 'thumbnail': video_data['artworkUrl'], - 'timestamp': timestamp, - 'like_count': like_count, + 'formats': formats, + 'thumbnail': self._html_search_meta( + ['og:image', 'og:image:secure_url', 'twitter:image'], webpage), + **traverse_obj(attributes, { + 'title': ('name', {str}), + 'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}), + 'upload_date': ('uploadDate', {unified_strdate}), + 'uploader': (('artistName', 'uploadingArtistName'), {str}, any), + 'webpage_url': ('postUrl', {url_or_none}), + }), } diff --git a/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py b/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py index b99d24e..91a7028 100644 --- a/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py +++ b/plugins/youtube_download/yt_dlp/extractor/applepodcasts.py @@ -1,5 +1,6 @@ from .common import InfoExtractor from ..utils import ( + clean_html, clean_podcast_url, int_or_none, parse_iso8601, @@ -17,7 +18,7 @@ class ApplePodcastsIE(InfoExtractor): 'ext': 'mp3', 'title': 'Ferreck Dawn - To The Break of Dawn 117', 'episode': 'Ferreck Dawn - To The Break of Dawn 117', - 'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc', + 'description': 'md5:8c4f5c2c30af17ed6a98b0b9daf15b76', 'upload_date': '20240812', 'timestamp': 1723449600, 'duration': 3596, @@ -58,7 +59,7 @@ class ApplePodcastsIE(InfoExtractor): r'', fatal=fatal, default=default, **kw) + def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): + """Parses Next.js app router flight data that was introduced in Next.js v13""" + nextjs_data = {} + if not fatal and not isinstance(webpage, str): + return nextjs_data + + def flatten(flight_data): + if not isinstance(flight_data, list): + return + if len(flight_data) == 4 and flight_data[0] == '$': + _, name, _, data = flight_data + if not isinstance(data, dict): + return + children = data.pop('children', None) + if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name): + # It is useful hydration JSON data + nextjs_data[name[2:]] = data + flatten(children) + return + for f in flight_data: + flatten(f) + + flight_text = '' + # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189 + for flight_segment in re.findall(r']*>self\.__next_f\.push\((\[.+?\])\)', webpage): + segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False) + # Some earlier versions of next.js "optimized" away this array structure; this is unsupported + # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761 + if not isinstance(segment, list) or len(segment) != 2: + self.write_debug( + f'{video_id}: Unsupported next.js flight data structure detected', only_once=True) + continue + # Only use the relevant payload type (1 == data) + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14 + payload_type, chunk = segment + if payload_type == 1: + flight_text += chunk + + for f in flight_text.splitlines(): + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): + flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data + + return nextjs_data + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) @@ -1790,11 +1848,68 @@ class InfoExtractor: return {} args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json( - f'[{arg_vals}]', video_id, transform_source=js_to_json, fatal=fatal) or ()))) + f'[{arg_vals}]', video_id, transform_source=js_to_json, fatal=fatal) or ()), strict=True)) ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) return traverse_obj(ret, traverse) or {} + def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT): + """Resolves Nuxt rich JSON payload arrays""" + # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 + # https://github.com/nuxt/nuxt/pull/19205 + if default is not NO_DEFAULT: + fatal = False + + if not isinstance(array, list) or not array: + error_msg = 'Unable to resolve Nuxt JSON data: invalid input' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id) + return {} if default is NO_DEFAULT else default + + def indirect_reviver(data): + return data + + def json_reviver(data): + return json.loads(data) + + gen = devalue.parse_iter(array, revivers={ + 'NuxtError': indirect_reviver, + 'EmptyShallowRef': json_reviver, + 'EmptyRef': json_reviver, + 'ShallowRef': indirect_reviver, + 'ShallowReactive': indirect_reviver, + 'Ref': indirect_reviver, + 'Reactive': indirect_reviver, + }) + + while True: + try: + error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id, only_once=True) + else: + self.write_debug(f'{video_id}: {error_msg}', only_once=True) + except StopIteration as error: + return error.value or ({} if default is NO_DEFAULT else default) + + def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): + """Parses metadata from Nuxt rich JSON payloads embedded in HTML""" + passed_default = default is not NO_DEFAULT + + array = self._search_json( + r']+\bid="__NUXT_DATA__"[^>]*>', webpage, + 'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]', + fatal=fatal, default=NO_DEFAULT if not passed_default else None) + + if not array: + return default if passed_default else {} + + return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default) + @staticmethod def _hidden_inputs(html): html = re.sub(r'', '', html) @@ -2068,21 +2183,33 @@ class InfoExtractor: raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - - res = self._download_webpage_handle( - m3u8_url, video_id, - note='Downloading m3u8 information' if note is None else note, - errnote='Failed to download m3u8 information' if errnote is None else errnote, + if note is None: + note = 'Downloading m3u8 information' + if errnote is None: + errnote = 'Failed to download m3u8 information' + response = self._request_webpage( + m3u8_url, video_id, note=note, errnote=errnote, fatal=fatal, data=data, headers=headers, query=query) - - if res is False: + if response is False: return [], {} - m3u8_doc, urlh = res - m3u8_url = urlh.url + with contextlib.closing(response): + prefix = response.read(512) + if not prefix.startswith(b'#EXTM3U'): + msg = 'Response data has no m3u header' + if fatal: + raise ExtractorError(msg, video_id=video_id) + self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) + return [], {} + + content = self._webpage_read_content( + response, m3u8_url, video_id, note=note, errnote=errnote, + fatal=fatal, prefix=prefix, data=data) + if content is False: + return [], {} return self._parse_m3u8_formats_and_subtitles( - m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, + content, response.url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) @@ -2842,7 +2969,7 @@ class InfoExtractor: else: codecs = parse_codecs(codec_str) if content_type not in ('video', 'audio', 'text'): - if mime_type == 'image/jpeg': + if mime_type in ('image/avif', 'image/jpeg'): content_type = mime_type elif codecs.get('vcodec', 'none') != 'none': content_type = 'video' @@ -2902,14 +3029,14 @@ class InfoExtractor: 'manifest_url': mpd_url, 'filesize': filesize, } - elif content_type == 'image/jpeg': + elif content_type in ('image/avif', 'image/jpeg'): # See test case in VikiIE # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1 f = { 'format_id': format_id, 'ext': 'mhtml', 'manifest_url': mpd_url, - 'format_note': 'DASH storyboards (jpeg)', + 'format_note': f'DASH storyboards ({mimetype2ext(mime_type)})', 'acodec': 'none', 'vcodec': 'none', } @@ -2981,7 +3108,6 @@ class InfoExtractor: else: # $Number*$ or $Time$ in media template with S list available # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg - # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 representation_ms_info['fragments'] = [] segment_time = 0 segment_d = None @@ -3051,7 +3177,7 @@ class InfoExtractor: 'url': mpd_url or base_url, 'fragment_base_url': base_url, 'fragments': [], - 'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml', + 'protocol': 'mhtml' if mime_type in ('image/avif', 'image/jpeg') else 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: initialization_url = representation_ms_info['initialization_url'] @@ -3066,7 +3192,7 @@ class InfoExtractor: else: # Assuming direct URL to unfragmented media. f['url'] = base_url - if content_type in ('video', 'audio', 'image/jpeg'): + if content_type in ('video', 'audio', 'image/avif', 'image/jpeg'): f['manifest_stream_number'] = stream_numbers[f['url']] stream_numbers[f['url']] += 1 period_entry['formats'].append(f) diff --git a/plugins/youtube_download/yt_dlp/extractor/condenast.py b/plugins/youtube_download/yt_dlp/extractor/condenast.py index 0c84cfd..318fa89 100644 --- a/plugins/youtube_download/yt_dlp/extractor/condenast.py +++ b/plugins/youtube_download/yt_dlp/extractor/condenast.py @@ -96,6 +96,24 @@ class CondeNastIE(InfoExtractor): 'upload_date': '20150916', 'timestamp': 1442434920, }, + }, { + # FIXME: Subtitles + 'url': 'https://www.vanityfair.com/video/watch/vf-quiz-show-squid-game-s3', + 'info_dict': { + 'id': '6862f999c1afbc5ff06b4803', + 'ext': 'mp4', + 'title': '\'Squid Game\' Cast Tests How Well They Know Each Other', + 'categories': ['Arts & Culture', 'Hollywood'], + 'description': 'md5:7a9c668a1fc87648e77da13842ec1534', + 'duration': 955, + 'season': 'Season 1', + 'series': 'Quizzing Each Other', + 'tags': 'count:2', + 'thumbnail': r're:https?://dwgyu36up6iuz\.cloudfront\.net/.+\.jpg', + 'timestamp': 1751341306, + 'upload_date': '20250701', + 'uploader': 'vanityfair', + }, }, { 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'only_matching': True, diff --git a/plugins/youtube_download/yt_dlp/extractor/crackle.py b/plugins/youtube_download/yt_dlp/extractor/crackle.py deleted file mode 100644 index c4ceba9..0000000 --- a/plugins/youtube_download/yt_dlp/extractor/crackle.py +++ /dev/null @@ -1,243 +0,0 @@ -import hashlib -import hmac -import re -import time - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - orderedSet, - parse_age_limit, - parse_duration, - url_or_none, -) - - -class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P\d+)' - _TESTS = [{ - # Crackle is available in the United States and territories - 'url': 'https://www.crackle.com/thanksgiving/2510064', - 'info_dict': { - 'id': '2510064', - 'ext': 'mp4', - 'title': 'Touch Football', - 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df', - 'duration': 1398, - 'view_count': int, - 'average_rating': 0, - 'age_limit': 17, - 'genre': 'Comedy', - 'creator': 'Daniel Powell', - 'artist': 'Chris Elliott, Amy Sedaris', - 'release_year': 2016, - 'series': 'Thanksgiving', - 'episode': 'Touch Football', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'expected_warnings': [ - 'Trying with a list of known countries', - ], - }, { - 'url': 'https://www.sonycrackle.com/thanksgiving/2510064', - 'only_matching': True, - }] - - _MEDIA_FILE_SLOTS = { - '360p.mp4': { - 'width': 640, - 'height': 360, - }, - '480p.mp4': { - 'width': 768, - 'height': 432, - }, - '480p_1mbps.mp4': { - 'width': 852, - 'height': 480, - }, - } - - def _download_json(self, url, *args, **kwargs): - # Authorization generation algorithm is reverse engineered from: - # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js - timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) - h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() - headers = { - 'Accept': 'application/json', - 'Authorization': '|'.join([h, timestamp, '117', '1']), - } - return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs) - - def _real_extract(self, url): - video_id = self._match_id(url) - - geo_bypass_country = self.get_param('geo_bypass_country', None) - countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', '')) - num_countries, num = len(countries) - 1, 0 - - media = {} - for num, country in enumerate(countries): - if num == 1: # start hard-coded list - self.report_warning('%s. Trying with a list of known countries' % ( - f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country - else 'No country code was given using --geo-bypass-country')) - elif num == num_countries: # end of list - geo_info = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/geo/country', - video_id, fatal=False, note='Downloading geo-location information from crackle API', - errnote='Unable to fetch geo-location information from crackle') or {} - country = geo_info.get('CountryCode') - if country is None: - continue - self.to_screen(f'{self.IE_NAME} identified country as {country}') - if country in countries: - self.to_screen(f'Downloading from {country} API was already attempted. Skipping...') - continue - - if country is None: - continue - try: - media = self._download_json( - f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true', - video_id, note=f'Downloading media JSON from {country} API', - errnote='Unable to download media JSON') - except ExtractorError as e: - # 401 means geo restriction, trying next country - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - continue - raise - - status = media.get('status') - if status.get('messageCode') != '0': - raise ExtractorError( - '{} said: {} {} - {}'.format( - self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')), - expected=True) - - # Found video formats - if isinstance(media.get('MediaURLs'), list): - break - - ignore_no_formats = self.get_param('ignore_no_formats_error') - - if not media or (not media.get('MediaURLs') and not ignore_no_formats): - raise ExtractorError( - 'Unable to access the crackle API. Try passing your country code ' - 'to --geo-bypass-country. If it still does not work and the ' - 'video is available in your country') - title = media['Title'] - - formats, subtitles = [], {} - has_drm = False - for e in media.get('MediaURLs') or []: - if e.get('UseDRM'): - has_drm = True - format_url = url_or_none(e.get('DRMPath')) - else: - format_url = url_or_none(e.get('Path')) - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - fmts, subs = self._extract_m3u8_formats_and_subtitles( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - elif ext == 'mpd': - fmts, subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id='dash', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - elif format_url.endswith('.ism/Manifest'): - fmts, subs = self._extract_ism_formats_and_subtitles( - format_url, video_id, ism_id='mss', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - else: - mfs_path = e.get('Type') - mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) - if not mfs_info: - continue - formats.append({ - 'url': format_url, - 'format_id': 'http-' + mfs_path.split('.')[0], - 'width': mfs_info['width'], - 'height': mfs_info['height'], - }) - if not formats and has_drm: - self.report_drm(video_id) - - description = media.get('Description') - duration = int_or_none(media.get( - 'DurationInSeconds')) or parse_duration(media.get('Duration')) - view_count = int_or_none(media.get('CountViews')) - average_rating = float_or_none(media.get('UserRating')) - age_limit = parse_age_limit(media.get('Rating')) - genre = media.get('Genre') - release_year = int_or_none(media.get('ReleaseYear')) - creator = media.get('Directors') - artist = media.get('Cast') - - if media.get('MediaTypeDisplayValue') == 'Full Episode': - series = media.get('ShowName') - episode = title - season_number = int_or_none(media.get('Season')) - episode_number = int_or_none(media.get('Episode')) - else: - series = episode = season_number = episode_number = None - - cc_files = media.get('ClosedCaptionFiles') - if isinstance(cc_files, list): - for cc_file in cc_files: - if not isinstance(cc_file, dict): - continue - cc_url = url_or_none(cc_file.get('Path')) - if not cc_url: - continue - lang = cc_file.get('Locale') or 'en' - subtitles.setdefault(lang, []).append({'url': cc_url}) - - thumbnails = [] - images = media.get('Images') - if isinstance(images, list): - for image_key, image_url in images.items(): - mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) - if not mobj: - continue - thumbnails.append({ - 'url': image_url, - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'average_rating': average_rating, - 'age_limit': age_limit, - 'genre': genre, - 'creator': creator, - 'artist': artist, - 'release_year': release_year, - 'series': series, - 'episode': episode, - 'season_number': season_number, - 'episode_number': episode_number, - 'thumbnails': thumbnails, - 'subtitles': subtitles, - 'formats': formats, - } diff --git a/plugins/youtube_download/yt_dlp/extractor/crooksandliars.py b/plugins/youtube_download/yt_dlp/extractor/crooksandliars.py index abd3322..29bbc2f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/crooksandliars.py +++ b/plugins/youtube_download/yt_dlp/extractor/crooksandliars.py @@ -8,7 +8,6 @@ from ..utils import ( class CrooksAndLiarsIE(InfoExtractor): _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P[A-Za-z0-9]+)' _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1'] - _TESTS = [{ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'info_dict': { @@ -16,7 +15,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', 'timestamp': 1428207000, 'upload_date': '20150405', 'uploader': 'Heather', @@ -26,6 +25,20 @@ class CrooksAndLiarsIE(InfoExtractor): 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'duration': 236, + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/plugins/youtube_download/yt_dlp/extractor/ctv.py b/plugins/youtube_download/yt_dlp/extractor/ctv.py deleted file mode 100644 index a41dab1..0000000 --- a/plugins/youtube_download/yt_dlp/extractor/ctv.py +++ /dev/null @@ -1,49 +0,0 @@ -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, # noqa: UP031 - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/cwtv.py b/plugins/youtube_download/yt_dlp/extractor/cwtv.py deleted file mode 100644 index cdb29fc..0000000 --- a/plugins/youtube_download/yt_dlp/extractor/cwtv.py +++ /dev/null @@ -1,180 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_age_limit, - parse_iso8601, - parse_qs, - smuggle_url, - str_or_none, - update_url_query, -) -from ..utils.traversal import traverse_obj - - -class CWTVIE(InfoExtractor): - IE_NAME = 'cwtv' - _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' - _TESTS = [{ - 'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b', - 'info_dict': { - 'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b', - 'ext': 'mp4', - 'title': 'A Stitch in Time', - 'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+', - 'thumbnail': r're:https?://.+\.jpe?g', - 'duration': 2632, - 'timestamp': 1736928000, - 'uploader': 'CWTV', - 'chapters': 'count:5', - 'series': 'Continuum', - 'season_number': 1, - 'episode_number': 1, - 'age_limit': 14, - 'upload_date': '20250115', - 'season': 'Season 1', - 'episode': 'Episode 1', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', - 'info_dict': { - 'id': '6b15e985-9345-4f60-baf8-56e96be57c63', - 'ext': 'mp4', - 'title': 'Legends of Yesterday', - 'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+', - 'duration': 2665, - 'series': 'Arrow', - 'season_number': 4, - 'season': '4', - 'episode_number': 8, - 'upload_date': '20151203', - 'timestamp': 1449122100, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'redirect to http://cwtv.com/shows/arrow/', - }, { - 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088', - 'info_dict': { - 'id': '24282b12-ead2-42f2-95ad-26770c2c6088', - 'ext': 'mp4', - 'title': 'Jeff Davis 4', - 'description': 'Jeff Davis is back to make you laugh.', - 'duration': 1263, - 'series': 'Whose Line Is It Anyway?', - 'season_number': 11, - 'episode_number': 20, - 'upload_date': '20151006', - 'timestamp': 1444107300, - 'age_limit': 14, - 'uploader': 'CWTV', - 'thumbnail': r're:https?://.+\.jpe?g', - 'chapters': 'count:4', - 'episode': 'Episode 20', - 'season': 'Season 11', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', - 'only_matching': True, - }, { - 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', - 'only_matching': True, - }, { - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', - 'only_matching': True, - }, { - 'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id) - if traverse_obj(data, 'result') != 'ok': - raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True) - video_data = data['video'] - title = video_data['title'] - mpx_url = update_url_query( - video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}', - {'formats': 'M3U+none'}) - - season = str_or_none(video_data.get('season')) - episode = str_or_none(video_data.get('episode')) - if episode and season: - episode = episode[len(season):] - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(mpx_url, {'force_smil_url': True}), - 'description': video_data.get('description_long'), - 'duration': int_or_none(video_data.get('duration_secs')), - 'series': video_data.get('series_name'), - 'season_number': int_or_none(season), - 'episode_number': int_or_none(episode), - 'timestamp': parse_iso8601(video_data.get('start_time')), - 'age_limit': parse_age_limit(video_data.get('rating')), - 'ie_key': 'ThePlatform', - 'thumbnail': video_data.get('large_thumbnail'), - } - - -class CWTVMovieIE(InfoExtractor): - IE_NAME = 'cwtv:movie' - _VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies' - _TESTS = [{ - 'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane', - 'info_dict': { - 'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', - 'ext': 'mp4', - 'title': 'The Crush', - 'upload_date': '20241112', - 'description': 'md5:1549acd90dff4a8273acd7284458363e', - 'chapters': 'count:9', - 'timestamp': 1731398400, - 'age_limit': 16, - 'duration': 5337, - 'series': 'The Crush', - 'season': 'Season 1', - 'uploader': 'CWTV', - 'season_number': 1, - 'episode': 'Episode 1', - 'episode_number': 1, - 'thumbnail': r're:https?://.+\.jpe?g', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - _UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - app_url = ( - self._html_search_meta('al:ios:url', webpage, default=None) - or self._html_search_meta('al:android:url', webpage, default=None)) - video_id = ( - traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0)) - or self._search_regex([ - rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})', - rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})', - ], webpage, 'video ID')) - - return self.url_result( - f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/dailymail.py b/plugins/youtube_download/yt_dlp/extractor/dailymail.py index 540676a..3058a0f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dailymail.py +++ b/plugins/youtube_download/yt_dlp/extractor/dailymail.py @@ -19,11 +19,22 @@ class DailyMailIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', }, }, { 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.daily-news.gr/lifestyle/%ce%b7-%cf%84%cf%81%ce%b1%ce%b3%ce%bf%cf%85%ce%b4%ce%af%cf%83%cf%84%cf%81%ce%b9%ce%b1-jessie-j-%ce%bc%ce%bf%ce%b9%cf%81%ce%ac%cf%83%cf%84%ce%b7%ce%ba%ce%b5-%cf%83%cf%85%ce%b3%ce%ba%ce%bb%ce%bf%ce%bd/', + 'info_dict': { + 'id': '3463585', + 'ext': 'mp4', + 'title': 'Jessie J reveals she has undergone surgery as she shares clips', + 'description': 'md5:9fa9a25feca5b656b0b4a39c922fad1e', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/plugins/youtube_download/yt_dlp/extractor/dailymotion.py b/plugins/youtube_download/yt_dlp/extractor/dailymotion.py index a81f0a2..017dbd5 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dailymotion.py +++ b/plugins/youtube_download/yt_dlp/extractor/dailymotion.py @@ -119,13 +119,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P{_VALID_URL[5:]})'] _TESTS = [{ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', - 'md5': '074b95bdee76b9e3654137aee9c79dfe', 'info_dict': { 'id': 'x5kesuj', 'ext': 'mp4', 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', 'duration': 187, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1493651285, 'upload_date': '20170501', 'uploader': 'Deadline', @@ -133,18 +134,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080', }, }, { 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', - 'md5': 'e2f9717c6604773f963f069ca53a07f8', 'info_dict': { 'id': 'x89eyek', 'ext': 'mp4', - 'title': "En quête d'esprit du 27/03/2022", + 'title': 'En quête d\'esprit du 27/03/2022', 'description': 'md5:66542b9f4df2eb23f314fc097488e553', 'duration': 2756, + 'tags': 'count:1', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1648383669, 'upload_date': '20220327', 'uploader': 'CNEWS', @@ -152,8 +152,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['en_quete_d_esprit'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080', }, }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', @@ -163,8 +161,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 74, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1425657362, 'upload_date': '20150306', 'uploader': 'IGN', @@ -173,20 +171,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'view_count': int, }, 'skip': 'video gone', - }, { - # Vevo video - 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', - 'info_dict': { - 'title': 'Roar (Official)', - 'id': 'USUV71301934', - 'ext': 'mp4', - 'uploader': 'Katy Perry', - 'upload_date': '20130905', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'VEVO is only available in some countries', }, { # age-restricted video 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', @@ -259,9 +243,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'x2vtgmm', 'age_limit': 0, 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'view_count': int, 'like_count': int, - 'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080', }, }, { # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj @@ -276,18 +260,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'info_dict': { 'id': 'x8u4owg', 'ext': 'mp4', + 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', 'like_count': int, 'uploader': 'Le Parisien', - 'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg', 'upload_date': '20240309', 'view_count': int, + 'tags': 'count:7', + 'thumbnail': r're:https?://www\.leparisien\.fr/.+\.jpg', 'timestamp': 1709997866, 'age_limit': 0, 'uploader_id': 'x32f7b', 'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes', 'duration': 428.0, - 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', - 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], }, }, { # https://geo.dailymotion.com/player/xry80.html?video=x8vu47w @@ -297,9 +281,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'like_count': int, 'uploader': 'Metatube', - 'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080', 'upload_date': '20240326', 'view_count': int, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1711496732, 'age_limit': 0, 'uploader_id': 'x2xpy74', @@ -308,6 +292,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'description': 'Que lindura', 'tags': [], }, + 'skip': 'Invalid URL', }, { # //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM 'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819', @@ -322,11 +307,30 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'like_count': int, 'age_limit': 0, 'duration': 3220, - 'thumbnail': 'https://s1.dmcdn.net/v/Xvumk1djJBUZfjj2a/x1080', 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1739919947, 'upload_date': '20250218', }, + 'skip': 'Invalid URL', + }, { + 'url': 'https://forum.ionicframework.com/t/ionic-2-jw-player-dailymotion-player/83248', + 'info_dict': { + 'id': 'xwr14q', + 'ext': 'mp4', + 'title': 'Macklemore & Ryan Lewis - Thrift Shop (feat. Wanz)', + 'age_limit': 0, + 'description': 'md5:47fbe168b5a6ddc4a205e20dd6c841b2', + 'duration': 234, + 'like_count': int, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', + 'timestamp': 1358177670, + 'upload_date': '20130114', + 'uploader': 'Macklemore Official', + 'uploader_id': 'x19qlwr', + 'view_count': int, + }, }] _GEO_BYPASS = False _COMMON_MEDIA_FIELDS = '''description @@ -540,7 +544,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE): 'id': 'king of turtles', 'title': 'king of turtles', }, - 'playlist_mincount': 90, + 'playlist_mincount': 0, }] _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } ' @@ -584,7 +588,7 @@ class DailymotionUserIE(DailymotionPlaylistBaseIE): 'info_dict': { 'id': 'nqtv', }, - 'playlist_mincount': 152, + 'playlist_mincount': 148, }, { 'url': 'http://www.dailymotion.com/user/UnderProject', 'info_dict': { diff --git a/plugins/youtube_download/yt_dlp/extractor/dangalplay.py b/plugins/youtube_download/yt_dlp/extractor/dangalplay.py index f7b2432..3b0dc1f 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dangalplay.py +++ b/plugins/youtube_download/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ from ..utils.traversal import traverse_obj class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ class DangalPlayBaseIE(InfoExtractor): f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ class DangalPlayIE(DangalPlayBaseIE): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ class DangalPlayIE(DangalPlayBaseIE): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( diff --git a/plugins/youtube_download/yt_dlp/extractor/dbtv.py b/plugins/youtube_download/yt_dlp/extractor/dbtv.py index 795fbac..ff93c64 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dbtv.py +++ b/plugins/youtube_download/yt_dlp/extractor/dbtv.py @@ -12,13 +12,13 @@ class DBTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', - 'thumbnail': r're:https?://.*\.jpg', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20160916', 'duration': 69, 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 'uploader': 'Dagbladet', }, - 'add_ie': ['Youtube'], + 'skip': 'Invalid URL', }, { 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, @@ -26,6 +26,20 @@ class DBTVIE(InfoExtractor): 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://www.dagbladet.no/nyheter/rekordstort-russisk-angrep/83325693', + 'info_dict': { + 'id': '1HW7fYry', + 'ext': 'mp4', + 'title': 'Putin taler - så skjer dette', + 'description': 'md5:3e8bacee33de861a9663d9a3fcc54e5e', + 'display_id': 'putin-taler-sa-skjer-dette', + 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+', + 'timestamp': 1751043600, + 'upload_date': '20250627', + }, + }] def _real_extract(self, url): display_id, video_id = self._match_valid_url(url).groups() diff --git a/plugins/youtube_download/yt_dlp/extractor/digitalconcerthall.py b/plugins/youtube_download/yt_dlp/extractor/digitalconcerthall.py index 4c4fe47..be16f5e 100644 --- a/plugins/youtube_download/yt_dlp/extractor/digitalconcerthall.py +++ b/plugins/youtube_download/yt_dlp/extractor/digitalconcerthall.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + determine_ext, jwt_decode_hs256, parse_codecs, try_get, @@ -222,11 +223,18 @@ class DigitalConcertHallIE(InfoExtractor): raise formats = [] - for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - for fmt in formats: - if fmt.get('format_note') and fmt.get('vcodec') == 'none': - fmt.update(parse_codecs(fmt['format_note'])) + for fmt_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts = self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + for fmt in fmts: + if fmt.get('format_note') and fmt.get('vcodec') == 'none': + fmt.update(parse_codecs(fmt['format_note'])) + formats.extend(fmts) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats(fmt_url, video_id, mpd_id='dash', fatal=False)) + else: + self.report_warning(f'Skipping unsupported format extension "{ext}"') yield { 'id': video_id, diff --git a/plugins/youtube_download/yt_dlp/extractor/douyutv.py b/plugins/youtube_download/yt_dlp/extractor/douyutv.py index e36eac9..68ace24 100644 --- a/plugins/youtube_download/yt_dlp/extractor/douyutv.py +++ b/plugins/youtube_download/yt_dlp/extractor/douyutv.py @@ -206,7 +206,7 @@ class DouyuTVIE(DouyuBaseIE): 'is_live': True, **traverse_obj(room, { 'display_id': ('url', {str}, {lambda i: i[1:]}), - 'title': ('room_name', {unescapeHTML}), + 'title': ('room_name', {str}, {unescapeHTML}), 'description': ('show_details', {str}), 'uploader': ('nickname', {str}), 'thumbnail': ('room_src', {url_or_none}), diff --git a/plugins/youtube_download/yt_dlp/extractor/dreisat.py b/plugins/youtube_download/yt_dlp/extractor/dreisat.py index edd66e4..fb8a8e8 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dreisat.py +++ b/plugins/youtube_download/yt_dlp/extractor/dreisat.py @@ -64,7 +64,7 @@ class DreiSatIE(ZDFBaseIE): 'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844', 'duration': 5399.0, - 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903', + 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1747256996338', 'chapters': 'count:24', 'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb', diff --git a/plugins/youtube_download/yt_dlp/extractor/dropout.py b/plugins/youtube_download/yt_dlp/extractor/dropout.py index a0d8aac..2cfb080 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dropout.py +++ b/plugins/youtube_download/yt_dlp/extractor/dropout.py @@ -18,15 +18,15 @@ from ..utils import ( class DropoutIE(InfoExtractor): - _LOGIN_URL = 'https://www.dropout.tv/login' + _LOGIN_URL = 'https://watch.dropout.tv/login' _NETRC_MACHINE = 'dropout' - _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' + _VALID_URL = r'https?://(?:watch\.)?dropout\.tv/(?:[^/?#]+/)*videos/(?P[^/?#]+)/?(?:[?#]|$)' _TESTS = [ { - 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', + 'url': 'https://watch.dropout.tv/game-changer/season:2/videos/yes-or-no', 'note': 'Episode in a series', - 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a', + 'md5': '4b76963f904f8bc4ba22dcf0e66ada06', 'info_dict': { 'id': '738153', 'display_id': 'yes-or-no', @@ -45,35 +45,35 @@ class DropoutIE(InfoExtractor): 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest', 'Failed to parse XML: not well-formed'], }, { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', + 'url': 'https://watch.dropout.tv/tablepop-presents-megadungeon-live/season:1/videos/enter-through-the-gift-shop', 'note': 'Episode in a series (missing release_date)', - 'md5': '712caf7c191f1c47c8f1879520c2fa5c', + 'md5': 'b08fb03050585ea25cd7ee092db9134c', 'info_dict': { - 'id': '320562', - 'display_id': 'episode-1', + 'id': '624270', + 'display_id': 'enter-through-the-gift-shop', 'ext': 'mp4', - 'title': 'The Beginning Begins', - 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg', - 'series': 'Dimension 20: Fantasy High', + 'title': 'Enter Through the Gift Shop', + 'description': 'A new adventuring party explores a gift shop and runs into a friendly orc -- and some angry goblins.', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/a1d876c3-3dee-4cd0-87c6-27a851b1d0ec.jpg', + 'series': 'TablePop Presents: MEGADUNGEON LIVE!', 'season_number': 1, 'season': 'Season 1', 'episode_number': 1, - 'episode': 'The Beginning Begins', - 'duration': 6838, + 'episode': 'Enter Through the Gift Shop', + 'duration': 7101, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest', 'Failed to parse XML: not well-formed'], }, { - 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', + 'url': 'https://watch.dropout.tv/videos/misfits-magic-holiday-special', 'note': 'Episode not in a series', - 'md5': 'c30fa18999c5880d156339f13c953a26', + 'md5': '1e6428f7756b02c93b573d39ddd789fe', 'info_dict': { 'id': '1915774', 'display_id': 'misfits-magic-holiday-special', @@ -87,7 +87,7 @@ class DropoutIE(InfoExtractor): 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest', 'Failed to parse XML: not well-formed'], }, ] @@ -125,7 +125,7 @@ class DropoutIE(InfoExtractor): display_id = self._match_id(url) webpage = None - if self._get_cookies('https://www.dropout.tv').get('_session'): + if self._get_cookies('https://watch.dropout.tv').get('_session'): webpage = self._download_webpage(url, display_id) if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VALID_URL = r'https?://(?:watch\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _TESTS = [ { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', + 'url': 'https://watch.dropout.tv/dimension-20-fantasy-high/season:1', 'note': 'Multi-season series with the season in the url', 'playlist_count': 24, 'info_dict': { @@ -179,7 +179,7 @@ class DropoutSeasonIE(InfoExtractor): }, }, { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high', + 'url': 'https://watch.dropout.tv/dimension-20-fantasy-high', 'note': 'Multi-season series with the season not in the url', 'playlist_count': 24, 'info_dict': { @@ -188,7 +188,7 @@ class DropoutSeasonIE(InfoExtractor): }, }, { - 'url': 'https://www.dropout.tv/dimension-20-shriek-week', + 'url': 'https://watch.dropout.tv/dimension-20-shriek-week', 'note': 'Single-season series', 'playlist_count': 4, 'info_dict': { @@ -197,7 +197,7 @@ class DropoutSeasonIE(InfoExtractor): }, }, { - 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3', + 'url': 'https://watch.dropout.tv/breaking-news-no-laugh-newsroom/season:3', 'note': 'Multi-season series with season in the url that requires pagination', 'playlist_count': 25, 'info_dict': { diff --git a/plugins/youtube_download/yt_dlp/extractor/dtube.py b/plugins/youtube_download/yt_dlp/extractor/dtube.py index 0d87820..2c47de8 100644 --- a/plugins/youtube_download/yt_dlp/extractor/dtube.py +++ b/plugins/youtube_download/yt_dlp/extractor/dtube.py @@ -1,5 +1,4 @@ import json -import socket from .common import InfoExtractor from ..utils import ( @@ -56,7 +55,7 @@ class DTubeIE(InfoExtractor): try: self.to_screen(f'{video_id}: Checking {format_id} video format URL') self._downloader._opener.open(video_url, timeout=5).close() - except socket.timeout: + except TimeoutError: self.to_screen( f'{video_id}: {format_id} URL is invalid, skipping') continue diff --git a/plugins/youtube_download/yt_dlp/extractor/eagleplatform.py b/plugins/youtube_download/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c0..0000000 --- a/plugins/youtube_download/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/plugins/youtube_download/yt_dlp/extractor/ertgr.py b/plugins/youtube_download/yt_dlp/extractor/ertgr.py index 6f3f60f..ba68d53 100644 --- a/plugins/youtube_download/yt_dlp/extractor/ertgr.py +++ b/plugins/youtube_download/yt_dlp/extractor/ertgr.py @@ -64,14 +64,12 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): _VALID_URL = r'ertflix:(?P[\w-]+)' _TESTS = [{ 'url': 'ertflix:monogramma-praxitelis-tzanoylinos', - 'md5': '5b9c2cd171f09126167e4082fc1dd0ef', 'info_dict': { 'id': 'monogramma-praxitelis-tzanoylinos', 'ext': 'mp4', - 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e', + 'title': 'monogramma-praxitelis-tzanoylinos', }, - }, - ] + }] def _extract_formats_and_subs(self, video_id): media_info = self._call_api(video_id, codename=video_id) @@ -131,13 +129,14 @@ class ERTFlixIE(ERTFlixBaseIE): 'duration': 3166, 'age_limit': 8, }, + 'skip': 'Invalid URL', }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, 'playlist_mincount': 64, }, { @@ -145,28 +144,28 @@ class ERTFlixIE(ERTFlixBaseIE): 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_count': 22, + 'playlist_mincount': 66, }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_mincount': 36, + 'playlist_mincount': 25, }, { 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9', 'info_dict': { 'id': 'ser.164991', 'age_limit': 8, - 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.', - 'title': 'Το δίκτυο', + 'title': 'The Network', + 'description': 'The first Greek show featuring topics exclusively around the internet.', }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, }, { 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', 'only_matching': True, @@ -282,6 +281,16 @@ class ERTWebtvEmbedIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', }, + 'skip': 'Invalid URL', + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/', + 'info_dict': { + 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'ext': 'mp4', + 'title': 'VOD - 2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'thumbnail': r're:https?://www\.ert\.gr/themata/photos/.+\.jpg', + }, }] def _real_extract(self, url): diff --git a/plugins/youtube_download/yt_dlp/extractor/espn.py b/plugins/youtube_download/yt_dlp/extractor/espn.py index 552f9af..ceba024 100644 --- a/plugins/youtube_download/yt_dlp/extractor/espn.py +++ b/plugins/youtube_download/yt_dlp/extractor/espn.py @@ -5,7 +5,6 @@ import urllib.parse from .adobepass import AdobePassIE from .common import InfoExtractor -from .once import OnceIE from ..utils import ( determine_ext, dict_get, @@ -16,7 +15,7 @@ from ..utils import ( ) -class ESPNIE(OnceIE): +class ESPNIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: @@ -131,9 +130,7 @@ class ESPNIE(OnceIE): return format_urls.add(source_url) ext = determine_ext(source_url) - if OnceIE.suitable(source_url): - formats.extend(self._extract_once_formats(source_url)) - elif ext == 'smil': + if ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) elif ext == 'f4m': @@ -332,6 +329,7 @@ class WatchESPNIE(AdobePassIE): }] _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c' + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ' def _call_bamgrid_api(self, path, video_id, payload=None, headers={}): if 'Authorization' not in headers: @@ -408,8 +406,8 @@ class WatchESPNIE(AdobePassIE): # TV Provider required else: - resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None) - auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode() + resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None) + auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode() asset = self._download_json( f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb', diff --git a/plugins/youtube_download/yt_dlp/extractor/facebook.py b/plugins/youtube_download/yt_dlp/extractor/facebook.py index 24ecb03..2c35013 100644 --- a/plugins/youtube_download/yt_dlp/extractor/facebook.py +++ b/plugins/youtube_download/yt_dlp/extractor/facebook.py @@ -81,13 +81,14 @@ class FacebookIE(InfoExtractor): 'description': 'md5:34675bda53336b1d16400265c2bb9b3b', 'uploader': 'RADIO KICKS FM', 'upload_date': '20230818', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1692346159, - 'thumbnail': r're:^https?://.*', 'uploader_id': '100063551323670', 'duration': 3133.583, 'view_count': int, 'concurrent_view_count': 0, }, + 'expected_warnings': ['Cannot parse data'], }, { 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'md5': '6a40d33c0eccbb1af76cf0485a052659', @@ -106,17 +107,18 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Asif', + 'title': '119 reactions · 1.4K shares | Asif Nawab Butt on Reels', 'description': '', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1399398998, - 'thumbnail': r're:^https?://.*', - 'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl', + 'uploader_id': 'pfbid028xue38TBXRyNbiqBSV2LFs3QK3yopvKjupbqFoL6U9SKbx4p2SMdJjQSBvnjsHGWl', 'duration': 131.03, 'concurrent_view_count': int, 'view_count': int, }, + 'expected_warnings': ['Cannot parse data'], }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', @@ -158,7 +160,7 @@ class FacebookIE(InfoExtractor): 'id': '10153664894881749', 'ext': 'mp4', 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1456259628, 'upload_date': '20160223', 'uploader': 'Barack Obama', @@ -168,7 +170,7 @@ class FacebookIE(InfoExtractor): # have 1080P, but only up to 720p in swf params # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '1659aa21fb3dd1585874f668e81a72c8', + 'md5': '70b82ebf5f0e9b91b2a49d3db3563611', 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', @@ -177,7 +179,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'uploader_id': '100059479812265', 'concurrent_view_count': int, @@ -198,13 +200,11 @@ class FacebookIE(InfoExtractor): 'uploader': 'Yaroslav Korpan', 'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl', 'concurrent_view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'duration': 11736.446, }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Invalid URL', }, { # FIXME: Cannot parse data error 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', @@ -215,7 +215,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', }, 'skip': 'Requires logging in', }, { @@ -244,9 +244,10 @@ class FacebookIE(InfoExtractor): 'upload_date': '20171124', 'uploader': 'Vickie Gentry', 'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'duration': 148.224, }, + 'skip': 'Invalid URL', }, { # data.node.comet_sections.content.story.attachments[].styles.attachment.media 'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl', @@ -260,7 +261,7 @@ class FacebookIE(InfoExtractor): 'duration': 132.675, 'uploader_id': '100064451419378', 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1701975646, }, }, { @@ -271,9 +272,9 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lela Evans', 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Lela Evans', - 'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl', + 'uploader_id': 'pfbid02wjMpknobSMnyynK3TNKN4Ww1StcpAKXgowqTyge3bz7LwHZMQ68uiXzzbu7xeryBl', 'upload_date': '20231228', 'timestamp': 1703804085, 'duration': 394.347, @@ -326,28 +327,27 @@ class FacebookIE(InfoExtractor): 'uploader_id': '100066514874195', 'duration': 4524.001, 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, }, { # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'info_dict': { 'id': '106560053808006', 'ext': 'mp4', - 'title': 'Josef', - 'thumbnail': r're:^https?://.*', + 'title': 'Josef Novak on Reels', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, - 'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl', + 'uploader_id': 'pfbid0cjYJYXpePWqhZ9DgpB6gKXrN2q3obwducdKm4wT7K5nkhbfKg5cneocYbsdaji7fl', 'timestamp': 1549275572, 'duration': 3.283, 'uploader': 'Josef Novak', 'description': '', 'upload_date': '20190204', }, + 'expected_warnings': ['Cannot parse data'], }, { # data.video.story.attachments[].media 'url': 'https://www.facebook.com/watch/?v=647537299265662', @@ -406,7 +406,7 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"', 'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Comitato Liberi Pensatori', 'uploader_id': '100065709540881', }, @@ -414,6 +414,56 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + #