diff --git a/plugins/disk_usage/plugin.py b/plugins/disk_usage/plugin.py index ec1c39d..65b9fa7 100644 --- a/plugins/disk_usage/plugin.py +++ b/plugins/disk_usage/plugin.py @@ -76,9 +76,10 @@ class Plugin(PluginBase): path = self._fm_state.tab.get_current_directory() # NOTE: -h = human readable, -d = depth asigned to 1 command = ["du", "-h", "-d", "1", path] - proc = subprocess.Popen(command, stdout=subprocess.PIPE) + proc = subprocess.Popen(command, stdout=subprocess.PIPE, encoding="utf-8") raw_data = proc.communicate()[0] - data = raw_data.decode("utf-8").strip() # NOTE: Will return data AFTER completion (if any) + # NOTE: Will return data AFTER completion (if any) + data = raw_data.strip() parts = data.split("\n") # NOTE: Last entry is curret dir. Move to top of list and pop off... diff --git a/plugins/searcher/mixins/file_search_mixin.py b/plugins/searcher/mixins/file_search_mixin.py index a72be13..6837b20 100644 --- a/plugins/searcher/mixins/file_search_mixin.py +++ b/plugins/searcher/mixins/file_search_mixin.py @@ -4,6 +4,7 @@ import subprocess import signal import json import shlex +from datetime import datetime # Lib imports import gi @@ -38,8 +39,10 @@ class FileSearchMixin: @daemon_threaded def _handle_find_file_query(self, widget=None, eve=None, query=None): # NOTE: Freeze IPC consumption - self.pause_fifo_update = True - self.search_query = "" + self.pause_fifo_update = True + self.search_query = "" + dt = datetime.now() + self.fsearch_time_stamp = datetime.timestamp(dt) # NOTE: Get timestamp # NOTE: Kill the former process if self._list_proc: diff --git a/plugins/searcher/mixins/grep_search_mixin.py b/plugins/searcher/mixins/grep_search_mixin.py index 9f7f1d9..3174cd0 100644 --- a/plugins/searcher/mixins/grep_search_mixin.py +++ b/plugins/searcher/mixins/grep_search_mixin.py @@ -5,6 +5,7 @@ import subprocess import signal import json import shlex +from datetime import datetime libgcc_s = ctypes.CDLL('libgcc_s.so.1') # Lib imports @@ -42,6 +43,8 @@ class GrepSearchMixin: # NOTE: Freeze IPC consumption self.pause_fifo_update = True self.grep_query = "" + dt = datetime.now() + self.grep_time_stamp = datetime.timestamp(dt) # NOTE: Get timestamp # NOTE: Kill the former process if self._grep_proc: diff --git a/plugins/searcher/plugin.py b/plugins/searcher/plugin.py index 774a113..391fe1b 100644 --- a/plugins/searcher/plugin.py +++ b/plugins/searcher/plugin.py @@ -37,23 +37,25 @@ class Plugin(IPCServer, FileSearchMixin, GrepSearchMixin, PluginBase): def __init__(self): super().__init__() - self.path = os.path.dirname(os.path.realpath(__file__)) - self.name = "Search" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus - # where self.name should not be needed for message comms - self._GLADE_FILE = f"{self.path}/search_dialog.glade" + self.path = os.path.dirname(os.path.realpath(__file__)) + self.name = "Search" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus + # where self.name should not be needed for message comms + self._GLADE_FILE = f"{self.path}/search_dialog.glade" - self._search_dialog = None - self._active_path = None - self.file_list_parent = None - self.grep_list_parent = None - self._file_list = None - self._grep_list = None - self._grep_proc = None - self._list_proc = None - self.pause_fifo_update = False self.update_list_ui_buffer = () - self.grep_query = "" - self.search_query = "" + self._search_dialog = None + self._active_path = None + self.file_list_parent = None + self.grep_list_parent = None + self._file_list = None + self._grep_list = None + self._grep_proc = None + self._list_proc = None + self.pause_fifo_update = False + self.grep_time_stamp = None + self.fsearch_time_stamp = None + self.grep_query = "" + self.search_query = "" def run(self): diff --git a/plugins/searcher/utils/ipc_server.py b/plugins/searcher/utils/ipc_server.py index e4ca2aa..b8b0b14 100644 --- a/plugins/searcher/utils/ipc_server.py +++ b/plugins/searcher/utils/ipc_server.py @@ -60,14 +60,22 @@ class IPCServer: msg = conn.recv() if "SEARCH|" in msg: - file = msg.split("SEARCH|")[1].strip() - if file: - GLib.idle_add(self._load_file_ui, file, priority=GLib.PRIORITY_LOW) + ts, file = msg.split("SEARCH|")[1].strip().split("|", 1) + try: + timestamp = float(ts) + if timestamp > self.fsearch_time_stamp and file: + GLib.idle_add(self._load_file_ui, file, priority=GLib.PRIORITY_LOW) + except Exception as e: + ... if "GREP|" in msg: - data = msg.split("GREP|")[1].strip() - if data: - GLib.idle_add(self._load_grep_ui, data, priority=GLib.PRIORITY_LOW) + ts, data = msg.split("GREP|")[1].strip().split("|", 1) + try: + timestamp = float(ts) + if timestamp > self.grep_time_stamp and data: + GLib.idle_add(self._load_grep_ui, data, priority=GLib.PRIORITY_LOW) + except Exception as e: + ... conn.close() diff --git a/plugins/searcher/utils/search.py b/plugins/searcher/utils/search.py index 5d833be..a61eb12 100755 --- a/plugins/searcher/utils/search.py +++ b/plugins/searcher/utils/search.py @@ -5,11 +5,11 @@ import os import traceback import argparse -import threading +import subprocess import json import base64 import time -import pickle +from datetime import datetime from setproctitle import setproctitle from multiprocessing.connection import Client @@ -26,18 +26,10 @@ _ipc_authkey = b'' + bytes(f'solarfm-search_grep-ipc', 'utf-8') filter = (".cpp", ".css", ".c", ".go", ".html", ".htm", ".java", ".js", ".json", ".lua", ".md", ".py", ".rs", ".toml", ".xml", ".pom") + \ (".txt", ".text", ".sh", ".cfg", ".conf", ".log") - -# NOTE: Threads WILL NOT die with parent's destruction. -def threaded(fn): - def wrapper(*args, **kwargs): - threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start() - return wrapper - -# NOTE: Threads WILL die with parent's destruction. -def daemon_threaded(fn): - def wrapper(*args, **kwargs): - threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start() - return wrapper +# NOTE: Create timestamp of when this launched. Is used in IPC to see if +# we are stale and that new call didn't fully kill this or older processes. +dt = datetime.now() +ts = datetime.timestamp(dt) def send_ipc_message(message) -> None: @@ -55,84 +47,51 @@ def file_search(path, query): for file in _files: if query in file.lower(): target = os.path.join(_path, file) - data = f"SEARCH|{json.dumps([target, file])}" + data = f"SEARCH|{ts}|{json.dumps([target, file])}" send_ipc_message(data) except Exception as e: print("Couldn't traverse to path. Might be permissions related...") traceback.print_exc() -def _search_for_string(file, query): - b64_file = base64.urlsafe_b64encode(file.encode('utf-8')).decode('utf-8') - grep_result_set = {} - padding = 15 - with open(file, 'rb') as fp: - # NOTE: I know there's an issue if there's a very large file with content - # all on one line will lower and dupe it. And, yes, it will only - # return one instance from the file. - try: - for i, raw in enumerate(fp): - line = None - llower = raw.lower() - if not query in llower: - continue +def grep_search(target=None, query=None): + if not query or not target: + return - if len(raw) > 72: - start = 0 - end = len(raw) - 1 - index = llower.index(query) - sindex = llower.index(query) - 15 if index >= 15 else abs(start - index) - index - eindex = sindex + 15 if end > (index + 15) else abs(index - end) + index - line = raw[sindex:eindex] - else: - line = raw + # NOTE: -n = provide line numbers, -R = Search recursive in given target + # -i = insensitive, -F = don't do regex parsing. (Treat as raw string) + command = ["grep", "-n", "-R", "-i", "-F", query, target] + proc = subprocess.Popen(command, stdout=subprocess.PIPE, encoding="utf-8") + raw_data = proc.communicate()[0].strip() + proc_data = raw_data.split("\n") # NOTE: Will return data AFTER completion (if any) + collection = {} - b64_line = base64.urlsafe_b64encode(line).decode('utf-8') - if f"{b64_file}" in grep_result_set.keys(): - grep_result_set[f"{b64_file}"][f"{i+1}"] = b64_line - else: - grep_result_set[f"{b64_file}"] = {} - grep_result_set[f"{b64_file}"] = {f"{i+1}": b64_line} + for line in proc_data: + file, line_no, data = line.split(":", 2) + b64_file = base64.urlsafe_b64encode(file.encode('utf-8')).decode('utf-8') + b64_data = base64.urlsafe_b64encode(data.encode('utf-8')).decode('utf-8') - except Exception as e: - ... + if b64_file in collection.keys(): + collection[f"{b64_file}"][f"{line_no}"] = b64_data + else: + collection[f"{b64_file}"] = {} + collection[f"{b64_file}"] = { f"{line_no}": b64_data} - try: - data = f"GREP|{json.dumps(grep_result_set)}" - send_ipc_message(data) - except Exception as e: - ... - - - -@daemon_threaded -def _search_for_string_threaded(file, query): - _search_for_string(file, query) - -def grep_search(path, query): try: - for file in os.listdir(path): - target = os.path.join(path, file) - if os.path.isdir(target): - grep_search(target, query) - else: - if target.lower().endswith(filter): - size = os.path.getsize(target) - if not size > 5000: - _search_for_string(target, query) - else: - _search_for_string_threaded(target, query) - + data = f"GREP|{ts}|{json.dumps(collection, separators=(',', ':'), indent=4)}" + send_ipc_message(data) except Exception as e: - print("Couldn't traverse to path. Might be permissions related...") - traceback.print_exc() + ... + + collection = {} + def search(args): if args.type == "file_search": file_search(args.dir, args.query.lower()) if args.type == "grep_search": - grep_search(args.dir, args.query.lower().encode("utf-8")) + grep_search(args.dir, args.query.encode("utf-8")) if __name__ == "__main__": diff --git a/plugins/trasher/plugin.py b/plugins/trasher/plugin.py index c0e4ca9..e1d8493 100644 --- a/plugins/trasher/plugin.py +++ b/plugins/trasher/plugin.py @@ -97,7 +97,6 @@ class Plugin(PluginBase): self.empty.hide() def delete_files(self, widget = None, eve = None): - self._event_system.emit("do_hide_context_menu") self._event_system.emit("get_current_state") state = self._fm_state uris = state.selected_files @@ -121,23 +120,19 @@ class Plugin(PluginBase): break def trash_files(self, widget = None, eve = None, verbocity = False): - self._event_system.emit("do_hide_context_menu") self._event_system.emit("get_current_state") state = self._fm_state for uri in state.selected_files: self.trashman.trash(uri, verbocity) def restore_trash_files(self, widget = None, eve = None, verbocity = False): - self._event_system.emit("do_hide_context_menu") self._event_system.emit("get_current_state") state = self._fm_state for uri in state.selected_files: self.trashman.restore(filename=uri.split("/")[-1], verbose = verbocity) def empty_trash(self, widget = None, eve = None, verbocity = False): - self._event_system.emit("do_hide_context_menu") self.trashman.empty(verbose = verbocity) def go_to_trash(self, widget = None, eve = None, verbocity = False): - self._event_system.emit("do_hide_context_menu") self._event_system.emit("go_to_path", self.trash_files_path) diff --git a/plugins/youtube_download/download.sh b/plugins/youtube_download/download.sh index 073b354..9ff7786 100755 --- a/plugins/youtube_download/download.sh +++ b/plugins/youtube_download/download.sh @@ -10,8 +10,10 @@ function main() { cd "$(dirname "")" echo "Working Dir: " $(pwd) - LINK=`xclip -selection clipboard -o` - yt-dlp --write-sub --embed-sub --sub-langs en -o "${1}/%(title)s.%(ext)s" "${LINK}" + + python "${HOME}/.config/solarfm/plugins/youtube_download/yt_dlp/__main__.py" \ + --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \ + -o "${1}/%(title)s.%(ext)s" "${LINK}" } main "$@"; diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/AUTHORS b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/AUTHORS new file mode 100644 index 0000000..e69de29 diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/INSTALLER b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/LICENSE b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/LICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/METADATA b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/METADATA new file mode 100644 index 0000000..1feb95d --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/METADATA @@ -0,0 +1,1992 @@ +Metadata-Version: 2.1 +Name: yt-dlp +Version: 2022.2.4 +Summary: A youtube-dl fork with additional features and patches +Home-page: https://github.com/yt-dlp/yt-dlp +Maintainer: pukkandan +Maintainer-email: pukkandan.ytdlp@gmail.com +License: UNKNOWN +Project-URL: Documentation, https://yt-dlp.readthedocs.io +Project-URL: Source, https://github.com/yt-dlp/yt-dlp +Project-URL: Tracker, https://github.com/yt-dlp/yt-dlp/issues +Project-URL: Funding, https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators +Platform: UNKNOWN +Classifier: Topic :: Multimedia :: Video +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: License :: Public Domain +Classifier: Operating System :: OS Independent +Requires-Python: >=3.6 +Description-Content-Type: text/markdown +Requires-Dist: mutagen +Requires-Dist: pycryptodomex +Requires-Dist: websockets + +Official repository: + +**PS**: Some links in this document will not work since this is a copy of the README.md from Github + + +
+ +[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) + +[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=blue&label=Download&style=for-the-badge)](#release-files "Release") +[![License: Unlicense](https://img.shields.io/badge/-Unlicense-brightgreen.svg?style=for-the-badge)](LICENSE "License") +[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") +[![Docs](https://img.shields.io/badge/-Docs-blue.svg?color=blue&style=for-the-badge)](https://readthedocs.org/projects/yt-dlp/ "Docs") +[![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") +[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") +[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") +[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") +[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") +[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") + +
+ + +yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project + + + + +* [NEW FEATURES](#new-features) + * [Differences in default behavior](#differences-in-default-behavior) +* [INSTALLATION](#installation) + * [Update](#update) + * [Release Files](#release-files) + * [Dependencies](#dependencies) + * [Compile](#compile) +* [USAGE AND OPTIONS](#usage-and-options) + * [General Options](#general-options) + * [Network Options](#network-options) + * [Geo-restriction](#geo-restriction) + * [Video Selection](#video-selection) + * [Download Options](#download-options) + * [Filesystem Options](#filesystem-options) + * [Thumbnail Options](#thumbnail-options) + * [Internet Shortcut Options](#internet-shortcut-options) + * [Verbosity and Simulation Options](#verbosity-and-simulation-options) + * [Workarounds](#workarounds) + * [Video Format Options](#video-format-options) + * [Subtitle Options](#subtitle-options) + * [Authentication Options](#authentication-options) + * [Post-processing Options](#post-processing-options) + * [SponsorBlock Options](#sponsorblock-options) + * [Extractor Options](#extractor-options) +* [CONFIGURATION](#configuration) + * [Authentication with .netrc file](#authentication-with-netrc-file) +* [OUTPUT TEMPLATE](#output-template) + * [Output template and Windows batch files](#output-template-and-windows-batch-files) + * [Output template examples](#output-template-examples) +* [FORMAT SELECTION](#format-selection) + * [Filtering Formats](#filtering-formats) + * [Sorting Formats](#sorting-formats) + * [Format Selection examples](#format-selection-examples) +* [MODIFYING METADATA](#modifying-metadata) + * [Modifying metadata examples](#modifying-metadata-examples) +* [EXTRACTOR ARGUMENTS](#extractor-arguments) +* [PLUGINS](#plugins) +* [EMBEDDING YT-DLP](#embedding-yt-dlp) +* [DEPRECATED OPTIONS](#deprecated-options) +* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) + * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) + * [Developer Instructions](CONTRIBUTING.md#developer-instructions) +* [MORE](#more) + + + +# NEW FEATURES + +* Based on **youtube-dl 2021.12.17 [commit/5add3f4](https://github.com/ytdl-org/youtube-dl/commit/5add3f4373287e6346ca3551239edab549284db3)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) + +* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API + +* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) + +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. + +* **Youtube improvements**: + * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content + * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works + * Mixes supports downloading multiple pages of content + * Some (but not all) age-gated content can be downloaded without cookies + * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) + * Redirect channel's home URL automatically to `/video` to preserve the old behaviour + * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given + * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) + * Download livestreams from the start using `--live-from-start` (experimental) + +* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]` + +* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters` + +* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used + +* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats + +* **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md) + +* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN + +* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details + +* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`) + +* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details + +* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` + +* **Other new options**: Many new options have been added such as `--concat-playlist`, `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc + +* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc + +* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details + +* **Self-updater**: The releases can be updated using `yt-dlp -U` + +See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes + +### Differences in default behavior + +Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: + +* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details +* `avconv` is not supported as an alternative to `ffmpeg` +* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` +* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order +* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this +* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both +* `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead +* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files +* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this +* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this +* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior +* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this +* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player +* Youtube live chat (if available) is considered as a subtitle. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent live chat from downloading +* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections +* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this +* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this +* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead +* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this +* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this + +For ease of use, a few more compat options are available: +* `--compat-options all`: Use all compat options +* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` +* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` + + +# INSTALLATION + +You can install yt-dlp using one of the following methods: + +### Using the release binary + +You can simply download the [correct binary file](#release-files) for your OS + + +[![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) +[![Linux](https://img.shields.io/badge/-Linux/MacOS/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) +[![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +[![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) +[![ALl versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) + + +Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) + + +In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways: + +``` +sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp +sudo chmod a+rx /usr/local/bin/yt-dlp +``` + +``` +sudo wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp +sudo chmod a+rx /usr/local/bin/yt-dlp +``` + +``` +sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp --dir /usr/local/bin -o yt-dlp +sudo chmod a+rx /usr/local/bin/yt-dlp +``` + + +### With [PIP](https://pypi.org/project/pip) + +You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: +``` +python3 -m pip install -U yt-dlp +``` + +You can install without any of the optional dependencies using: +``` +python3 -m pip install --no-deps -U yt-dlp +``` + +If you want to be on the cutting edge, you can also install the master branch with: +``` +python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip +``` + +Note that on some systems, you may need to use `py` or `python` instead of `python3` + + +### With [Homebrew](https://brew.sh) + +macOS or Linux users that are using Homebrew can also install it by: + +``` +brew install yt-dlp/taps/yt-dlp +``` + +## UPDATE +You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) + +If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program + +If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` + + +## RELEASE FILES + +#### Recommended + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) + +#### Alternatives + +File|Description +:---|:--- +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) + +#### Misc + +File|Description +:---|:--- +[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc +[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums +[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums + + +## DEPENDENCIES +Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. + + + +While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended + +* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) +* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) +* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) +* Any external downloader that you want to use with `--downloader` + +To use or redistribute the dependencies, you must agree to their respective licensing terms. + +The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. + +**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + + +## COMPILE + +**For Windows**: +To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. + + py -m pip install -U pyinstaller -r requirements.txt + py devscripts/make_lazy_extractors.py + py pyinst.py + +Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment + +**For Unix**: +You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `pytest` +Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files + +**Note**: In either platform, `devscripts/update-version.py` can be used to automatically update the version number + +You can also fork the project on github and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a release + +# USAGE AND OPTIONS + + + yt-dlp [OPTIONS] [--] URL [URL...] + +`Ctrl+F` is your friend :D + + + +## General Options: + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to latest version. Make + sure that you have sufficient permissions + (run with sudo if needed) + -i, --ignore-errors Ignore download and postprocessing errors. + The download will be considered successful + even if the postprocessing fails + --no-abort-on-error Continue with next video on download + errors; e.g. to skip unavailable videos in + a playlist (default) + --abort-on-error Abort downloading of further videos if an + error occurs (Alias: --no-ignore-errors) + --dump-user-agent Display the current user-agent and exit + --list-extractors List all supported extractors and exit + --extractor-descriptions Output descriptions of all supported + extractors and exit + --force-generic-extractor Force extraction to use the generic + extractor + --default-search PREFIX Use this prefix for unqualified URLs. For + example "gvsearch2:" downloads two videos + from google videos for the search term + "large apple". Use the value "auto" to let + yt-dlp guess ("auto_warning" to emit a + warning when guessing). "error" just throws + an error. The default value "fixup_error" + repairs broken URLs, but emits an error if + this is not possible instead of searching + --ignore-config Don't load any more configuration files + except those given by --config-locations. + For backward compatibility, if this option + is found inside the system configuration + file, the user configuration is not loaded. + (Alias: --no-config) + --no-config-locations Do not load any custom configuration files + (default). When given inside a + configuration file, ignore all previous + --config-locations defined in the current + file + --config-locations PATH Location of the main configuration file; + either the path to the config or its + containing directory. Can be used multiple + times and inside other configuration files + --flat-playlist Do not extract the videos of a playlist, + only list them + --no-flat-playlist Extract the videos of a playlist + --live-from-start Download livestreams from the start. + Currently only supported for YouTube + (Experimental) + --no-live-from-start Download livestreams from the current time + (default) + --wait-for-video MIN[-MAX] Wait for scheduled streams to become + available. Pass the minimum number of + seconds (or range) to wait between retries + --no-wait-for-video Do not wait for scheduled streams (default) + --mark-watched Mark videos watched (even with --simulate). + Currently only supported for YouTube + --no-mark-watched Do not mark videos watched (default) + --no-colors Do not emit color codes in output + --compat-options OPTS Options that can help keep compatibility + with youtube-dl or youtube-dlc + configurations by reverting some of the + changes made in yt-dlp. See "Differences in + default behavior" for details + +## Network Options: + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. + To enable SOCKS proxy, specify a proper + scheme. For example + socks5://user:pass@127.0.0.1:1080/. Pass in + an empty string (--proxy "") for direct + connection + --socket-timeout SECONDS Time to wait before giving up, in seconds + --source-address IP Client-side IP address to bind to + -4, --force-ipv4 Make all connections via IPv4 + -6, --force-ipv6 Make all connections via IPv6 + +## Geo-restriction: + --geo-verification-proxy URL Use this proxy to verify the IP address for + some geo-restricted sites. The default + proxy specified by --proxy (or none, if the + option is not present) is used for the + actual downloading + --geo-bypass Bypass geographic restriction via faking + X-Forwarded-For HTTP header (default) + --no-geo-bypass Do not bypass geographic restriction via + faking X-Forwarded-For HTTP header + --geo-bypass-country CODE Force bypass geographic restriction with + explicitly provided two-letter ISO 3166-2 + country code + --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with + explicitly provided IP block in CIDR + notation + +## Video Selection: + --playlist-start NUMBER Playlist video to start at (default is 1) + --playlist-end NUMBER Playlist video to end at (default is last) + --playlist-items ITEM_SPEC Playlist video items to download. Specify + indices of the videos in the playlist + separated by commas like: "--playlist-items + 1,2,5,8" if you want to download videos + indexed 1, 2, 5, 8 in the playlist. You can + specify range: "--playlist-items + 1-3,7,10-13", it will download the videos + at index 1, 2, 3, 7, 10, 11, 12 and 13 + --min-filesize SIZE Do not download any videos smaller than + SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE + (e.g. 50k or 44.6m) + --date DATE Download only videos uploaded on this date. + The date can be "YYYYMMDD" or in the format + "(now|today)[+-][0-9](day|week|month|year)( + s)?" + --datebefore DATE Download only videos uploaded on or before + this date. The date formats accepted is the + same as --date + --dateafter DATE Download only videos uploaded on or after + this date. The date formats accepted is the + same as --date + --match-filter FILTER Generic video filter. Any field (see + "OUTPUT TEMPLATE") can be compared with a + number or a string using the operators + defined in "Filtering formats". You can + also simply specify a field to match if the + field is present and "!field" to check if + the field is not present. In addition, + Python style regular expression matching + can be done using "~=", and multiple + filters can be checked with "&". Use a "\" + to escape "&" or quotes if needed. Eg: + --match-filter "!is_live & like_count>?100 + & description~='(?i)\bcats \& dogs\b'" + matches only videos that are not live, has + a like count more than 100 (or the like + field is not available), and also has a + description that contains the phrase "cats + & dogs" (ignoring case) + --no-match-filter Do not use generic video filter (default) + --no-playlist Download only the video, if the URL refers + to a video and a playlist + --yes-playlist Download the playlist, if the URL refers to + a video and a playlist + --age-limit YEARS Download only videos suitable for the given + age + --download-archive FILE Download only videos not listed in the + archive file. Record the IDs of all + downloaded videos in it + --no-download-archive Do not use archive file (default) + --max-downloads NUMBER Abort after downloading NUMBER files + --break-on-existing Stop the download process when encountering + a file that is in the archive + --break-on-reject Stop the download process when encountering + a file that has been filtered out + --break-per-input Make --break-on-existing and --break-on- + reject act only on the current input URL + --no-break-per-input --break-on-existing and --break-on-reject + terminates the entire download queue + --skip-playlist-after-errors N Number of allowed failures until the rest + of the playlist is skipped + +## Download Options: + -N, --concurrent-fragments N Number of fragments of a dash/hlsnative + video that should be downloaded + concurrently (default is 1) + -r, --limit-rate RATE Maximum download rate in bytes per second + (e.g. 50K or 4.2M) + --throttled-rate RATE Minimum download rate in bytes per second + below which throttling is assumed and the + video data is re-extracted (e.g. 100K) + -R, --retries RETRIES Number of retries (default is 10), or + "infinite" + --file-access-retries RETRIES Number of times to retry on file access + error (default is 10), or "infinite" + --fragment-retries RETRIES Number of retries for a fragment (default + is 10), or "infinite" (DASH, hlsnative and + ISM) + --skip-unavailable-fragments Skip unavailable fragments for DASH, + hlsnative and ISM (default) (Alias: --no- + abort-on-unavailable-fragment) + --abort-on-unavailable-fragment Abort downloading if a fragment is + unavailable (Alias: --no-skip-unavailable- + fragments) + --keep-fragments Keep downloaded fragments on disk after + downloading is finished + --no-keep-fragments Delete downloaded fragments after + downloading is finished (default) + --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) + (default is 1024) + --resize-buffer The buffer size is automatically resized + from an initial value of --buffer-size + (default) + --no-resize-buffer Do not automatically adjust the buffer size + --http-chunk-size SIZE Size of a chunk for chunk-based HTTP + downloading (e.g. 10485760 or 10M) (default + is disabled). May be useful for bypassing + bandwidth throttling imposed by a webserver + (experimental) + --playlist-reverse Download playlist videos in reverse order + --no-playlist-reverse Download playlist videos in default order + (default) + --playlist-random Download playlist videos in random order + --xattr-set-filesize Set file xattribute ytdl.filesize with + expected file size + --hls-use-mpegts Use the mpegts container for HLS videos; + allowing some players to play the video + while downloading, and reducing the chance + of file corruption if download is + interrupted. This is enabled by default for + live streams + --no-hls-use-mpegts Do not use the mpegts container for HLS + videos. This is default when not + downloading live streams + --downloader [PROTO:]NAME Name or path of the external downloader to + use (optionally) prefixed by the protocols + (http, ftp, m3u8, dash, rstp, rtmp, mms) to + use it for. Currently supports native, + aria2c, avconv, axel, curl, ffmpeg, httpie, + wget (Recommended: aria2c). You can use + this option multiple times to set different + downloaders for different protocols. For + example, --downloader aria2c --downloader + "dash,m3u8:native" will use aria2c for + http/ftp downloads, and the native + downloader for dash/m3u8 downloads (Alias: + --external-downloader) + --downloader-args NAME:ARGS Give these arguments to the external + downloader. Specify the downloader name and + the arguments separated by a colon ":". For + ffmpeg, arguments can be passed to + different positions using the same syntax + as --postprocessor-args. You can use this + option multiple times to give different + arguments to different downloaders (Alias: + --external-downloader-args) + +## Filesystem Options: + -a, --batch-file FILE File containing URLs to download ("-" for + stdin), one URL per line. Lines starting + with "#", ";" or "]" are considered as + comments and ignored + --no-batch-file Do not read URLs from batch file (default) + -P, --paths [TYPES:]PATH The paths where the files should be + downloaded. Specify the type of file and + the path separated by a colon ":". All the + same TYPES as --output are supported. + Additionally, you can also provide "home" + (default) and "temp" paths. All + intermediary files are first downloaded to + the temp path and then the final files are + moved over to the home path after download + is finished. This option is ignored if + --output is an absolute path + -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT + TEMPLATE" for details + --output-na-placeholder TEXT Placeholder value for unavailable meta + fields in output filename template + (default: "NA") + --restrict-filenames Restrict filenames to only ASCII + characters, and avoid "&" and spaces in + filenames + --no-restrict-filenames Allow Unicode characters, "&" and spaces in + filenames (default) + --windows-filenames Force filenames to be Windows-compatible + --no-windows-filenames Make filenames Windows-compatible only if + using Windows (default) + --trim-filenames LENGTH Limit the filename length (excluding + extension) to the specified number of + characters + -w, --no-overwrites Do not overwrite any files + --force-overwrites Overwrite all video and metadata files. + This option includes --no-continue + --no-force-overwrites Do not overwrite the video, but overwrite + related files (default) + -c, --continue Resume partially downloaded files/fragments + (default) + --no-continue Do not resume partially downloaded + fragments. If the file is not fragmented, + restart download of the entire file + --part Use .part files instead of writing directly + into output file (default) + --no-part Do not use .part files - write directly + into output file + --mtime Use the Last-modified header to set the + file modification time (default) + --no-mtime Do not use the Last-modified header to set + the file modification time + --write-description Write video description to a .description + file + --no-write-description Do not write video description (default) + --write-info-json Write video metadata to a .info.json file + (this may contain personal information) + --no-write-info-json Do not write video metadata (default) + --write-playlist-metafiles Write playlist metadata in addition to the + video metadata when using --write-info- + json, --write-description etc. (default) + --no-write-playlist-metafiles Do not write playlist metadata when using + --write-info-json, --write-description etc. + --clean-infojson Remove some private fields such as + filenames from the infojson. Note that it + could still contain some personal + information (default) + --no-clean-infojson Write all fields to the infojson + --write-comments Retrieve video comments to be placed in the + infojson. The comments are fetched even + without this option if the extraction is + known to be quick (Alias: --get-comments) + --no-write-comments Do not retrieve video comments unless the + extraction is known to be quick (Alias: + --no-get-comments) + --load-info-json FILE JSON file containing the video information + (created with the "--write-info-json" + option) + --cookies FILE Netscape formatted file to read cookies + from and dump cookie jar in + --no-cookies Do not read/dump cookies from/to file + (default) + --cookies-from-browser BROWSER[+KEYRING][:PROFILE] + The name of the browser and (optionally) + the name/path of the profile to load + cookies from, separated by a ":". Currently + supported browsers are: brave, chrome, + chromium, edge, firefox, opera, safari, + vivaldi. By default, the most recently + accessed profile is used. The keyring used + for decrypting Chromium cookies on Linux + can be (optionally) specified after the + browser name separated by a "+". Currently + supported keyrings are: basictext, + gnomekeyring, kwallet + --no-cookies-from-browser Do not load cookies from browser (default) + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information (such + as client ids and signatures) permanently. + By default $XDG_CACHE_HOME/yt-dlp or + ~/.cache/yt-dlp + --no-cache-dir Disable filesystem caching + --rm-cache-dir Delete all filesystem cache files + +## Thumbnail Options: + --write-thumbnail Write thumbnail image to disk + --no-write-thumbnail Do not write thumbnail image to disk + (default) + --write-all-thumbnails Write all thumbnail image formats to disk + --list-thumbnails List available thumbnails of each video. + Simulate unless --no-simulate is used + +## Internet Shortcut Options: + --write-link Write an internet shortcut file, depending + on the current platform (.url, .webloc or + .desktop). The URL may be cached by the OS + --write-url-link Write a .url Windows internet shortcut. The + OS caches the URL based on the file path + --write-webloc-link Write a .webloc macOS internet shortcut + --write-desktop-link Write a .desktop Linux internet shortcut + +## Verbosity and Simulation Options: + -q, --quiet Activate quiet mode. If used with + --verbose, print the log to stderr + --no-warnings Ignore warnings + -s, --simulate Do not download the video and do not write + anything to disk + --no-simulate Download the video even if printing/listing + options are used + --ignore-no-formats-error Ignore "No video formats" error. Useful for + extracting metadata even if the videos are + not actually available for download + (experimental) + --no-ignore-no-formats-error Throw error when no downloadable video + formats are found (default) + --skip-download Do not download the video but write all + related files (Alias: --no-download) + -O, --print [WHEN:]TEMPLATE Field name or output template to print to + screen, optionally prefixed with when to + print it, separated by a ":". Supported + values of "WHEN" are the same as that of + --use-postprocessor, and "video" (default). + Implies --quiet and --simulate (unless + --no-simulate is used). This option can be + used multiple times + --print-to-file [WHEN:]TEMPLATE FILE + Append given template to the file. The + values of WHEN and TEMPLATE are same as + that of --print. FILE uses the same syntax + as the output template. This option can be + used multiple times + -j, --dump-json Quiet, but print JSON information for each + video. Simulate unless --no-simulate is + used. See "OUTPUT TEMPLATE" for a + description of available keys + -J, --dump-single-json Quiet, but print JSON information for each + url or infojson passed. Simulate unless + --no-simulate is used. If the URL refers to + a playlist, the whole playlist information + is dumped in a single line + --force-write-archive Force download archive entries to be + written as far as no errors occur, even if + -s or another simulation option is used + (Alias: --force-download-archive) + --newline Output progress bar as new lines + --no-progress Do not print progress bar + --progress Show progress bar, even if in quiet mode + --console-title Display progress in console titlebar + --progress-template [TYPES:]TEMPLATE + Template for progress outputs, optionally + prefixed with one of "download:" (default), + "download-title:" (the console title), + "postprocess:", or "postprocess-title:". + The video's fields are accessible under the + "info" key and the progress attributes are + accessible under "progress" key. E.g.: + --console-title --progress-template + "download- + title:%(info.id)s-%(progress.eta)s" + -v, --verbose Print various debugging information + --dump-pages Print downloaded pages encoded using base64 + to debug problems (very verbose) + --write-pages Write downloaded intermediary pages to + files in the current directory to debug + problems + --print-traffic Display sent and read HTTP traffic + +## Workarounds: + --encoding ENCODING Force the specified encoding (experimental) + --legacy-server-connect Explicitly allow HTTPS connection to + servers that do not support RFC 5746 secure + renegotiation + --no-check-certificates Suppress HTTPS certificate validation + --prefer-insecure Use an unencrypted connection to retrieve + information about the video (Currently + supported only for YouTube) + --user-agent UA Specify a custom user agent + --referer URL Specify a custom referer, use if the video + access is restricted to one domain + --add-header FIELD:VALUE Specify a custom HTTP header and its value, + separated by a colon ":". You can use this + option multiple times + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + --sleep-requests SECONDS Number of seconds to sleep between requests + during data extraction + --sleep-interval SECONDS Number of seconds to sleep before each + download. This is the minimum time to sleep + when used along with --max-sleep-interval + (Alias: --min-sleep-interval) + --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can + only be used along with --min-sleep- + interval + --sleep-subtitles SECONDS Number of seconds to sleep before each + subtitle download + +## Video Format Options: + -f, --format FORMAT Video format code, see "FORMAT SELECTION" + for more details + -S, --format-sort SORTORDER Sort the formats by the fields given, see + "Sorting Formats" for more details + --format-sort-force Force user specified sort order to have + precedence over all fields, see "Sorting + Formats" for more details + --no-format-sort-force Some fields have precedence over the user + specified sort order (default), see + "Sorting Formats" for more details + --video-multistreams Allow multiple video streams to be merged + into a single file + --no-video-multistreams Only one video stream is downloaded for + each output file (default) + --audio-multistreams Allow multiple audio streams to be merged + into a single file + --no-audio-multistreams Only one audio stream is downloaded for + each output file (default) + --prefer-free-formats Prefer video formats with free containers + over non-free ones of same quality. Use + with "-S ext" to strictly prefer free + containers irrespective of quality + --no-prefer-free-formats Don't give any special preference to free + containers (default) + --check-formats Check that the selected formats are + actually downloadable + --check-all-formats Check all formats for whether they are + actually downloadable + --no-check-formats Do not check that the formats are actually + downloadable + -F, --list-formats List available formats of each video. + Simulate unless --no-simulate is used + --merge-output-format FORMAT If a merge is required (e.g. + bestvideo+bestaudio), output to given + container format. One of mkv, mp4, ogg, + webm, flv. Ignored if no merge is required + +## Subtitle Options: + --write-subs Write subtitle file + --no-write-subs Do not write subtitle file (default) + --write-auto-subs Write automatically generated subtitle file + (Alias: --write-automatic-subs) + --no-write-auto-subs Do not write auto-generated subtitles + (default) (Alias: --no-write-automatic- + subs) + --list-subs List available subtitles of each video. + Simulate unless --no-simulate is used + --sub-format FORMAT Subtitle format, accepts formats + preference, for example: "srt" or + "ass/srt/best" + --sub-langs LANGS Languages of the subtitles to download (can + be regex) or "all" separated by commas. + (Eg: --sub-langs "en.*,ja") You can prefix + the language code with a "-" to exempt it + from the requested languages. (Eg: --sub- + langs all,-live_chat) Use --list-subs for a + list of available language tags + +## Authentication Options: + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left + out, yt-dlp will ask interactively + -2, --twofactor TWOFACTOR Two-factor authentication code + -n, --netrc Use .netrc authentication data + --netrc-location PATH Location of .netrc authentication data; + either the path or its containing + directory. Defaults to ~/.netrc + --video-password PASSWORD Video password (vimeo, youku) + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, yt-dlp will ask + interactively + --ap-list-mso List all supported multiple-system + operators + +## Post-Processing Options: + -x, --extract-audio Convert video files to audio-only files + (requires ffmpeg and ffprobe) + --audio-format FORMAT Specify audio format to convert the audio + to when -x is used. Currently supported + formats are: best (default) or one of + best|aac|flac|mp3|m4a|opus|vorbis|wav|alac + --audio-quality QUALITY Specify ffmpeg audio quality, insert a + value between 0 (best) and 10 (worst) for + VBR or a specific bitrate like 128K + (default 5) + --remux-video FORMAT Remux the video into another container if + necessary (currently supported: mp4|mkv|flv + |webm|mov|avi|mp3|mka|m4a|ogg|opus). If + target container does not support the + video/audio codec, remuxing will fail. You + can specify multiple rules; Eg. + "aac>m4a/mov>mp4/mkv" will remux aac to + m4a, mov to mp4 and anything else to mkv. + --recode-video FORMAT Re-encode the video into another format if + re-encoding is necessary. The syntax and + supported formats are the same as --remux- + video + --postprocessor-args NAME:ARGS Give these arguments to the postprocessors. + Specify the postprocessor/executable name + and the arguments separated by a colon ":" + to give the argument to the specified + postprocessor/executable. Supported PP are: + Merger, ModifyChapters, SplitChapters, + ExtractAudio, VideoRemuxer, VideoConvertor, + Metadata, EmbedSubtitle, EmbedThumbnail, + SubtitlesConvertor, ThumbnailsConvertor, + FixupStretched, FixupM4a, FixupM3u8, + FixupTimestamp and FixupDuration. The + supported executables are: AtomicParsley, + FFmpeg and FFprobe. You can also specify + "PP+EXE:ARGS" to give the arguments to the + specified executable only when being used + by the specified postprocessor. + Additionally, for ffmpeg/ffprobe, "_i"/"_o" + can be appended to the prefix optionally + followed by a number to pass the argument + before the specified input/output file. Eg: + --ppa "Merger+ffmpeg_i1:-v quiet". You can + use this option multiple times to give + different arguments to different + postprocessors. (Alias: --ppa) + -k, --keep-video Keep the intermediate video file on disk + after post-processing + --no-keep-video Delete the intermediate video file after + post-processing (default) + --post-overwrites Overwrite post-processed files (default) + --no-post-overwrites Do not overwrite post-processed files + --embed-subs Embed subtitles in the video (only for mp4, + webm and mkv videos) + --no-embed-subs Do not embed subtitles (default) + --embed-thumbnail Embed thumbnail in the video as cover art + --no-embed-thumbnail Do not embed thumbnail (default) + --embed-metadata Embed metadata to the video file. Also + embeds chapters/infojson if present unless + --no-embed-chapters/--no-embed-info-json + are used (Alias: --add-metadata) + --no-embed-metadata Do not add metadata to file (default) + (Alias: --no-add-metadata) + --embed-chapters Add chapter markers to the video file + (Alias: --add-chapters) + --no-embed-chapters Do not add chapter markers (default) + (Alias: --no-add-chapters) + --embed-info-json Embed the infojson as an attachment to + mkv/mka video files + --no-embed-info-json Do not embed the infojson as an attachment + to the video file + --parse-metadata FROM:TO Parse additional metadata like title/artist + from other fields; see "MODIFYING METADATA" + for details + --replace-in-metadata FIELDS REGEX REPLACE + Replace text in a metadata field using the + given regex. This option can be used + multiple times + --xattrs Write metadata to the video file's xattrs + (using dublin core and xdg standards) + --concat-playlist POLICY Concatenate videos in a playlist. One of + "never", "always", or "multi_video" + (default; only when the videos form a + single show). All the video files must have + same codecs and number of streams to be + concatable. The "pl_video:" prefix can be + used with "--paths" and "--output" to set + the output filename for the split files. + See "OUTPUT TEMPLATE" for details + --fixup POLICY Automatically correct known faults of the + file. One of never (do nothing), warn (only + emit a warning), detect_or_warn (the + default; fix file if we can, warn + otherwise), force (try fixing even if file + already exists) + --ffmpeg-location PATH Location of the ffmpeg binary; either the + path to the binary or its containing + directory + --exec [WHEN:]CMD Execute a command, optionally prefixed with + when to execute it (after_move if + unspecified), separated by a ":". Supported + values of "WHEN" are the same as that of + --use-postprocessor. Same syntax as the + output template can be used to pass any + field as arguments to the command. After + download, an additional field "filepath" + that contains the final path of the + downloaded file is also available, and if + no fields are passed, %(filepath)q is + appended to the end of the command. This + option can be used multiple times + --no-exec Remove any previously defined --exec + --convert-subs FORMAT Convert the subtitles to another format + (currently supported: srt|vtt|ass|lrc) + (Alias: --convert-subtitles) + --convert-thumbnails FORMAT Convert the thumbnails to another format + (currently supported: jpg|png) + --split-chapters Split video into multiple files based on + internal chapters. The "chapter:" prefix + can be used with "--paths" and "--output" + to set the output filename for the split + files. See "OUTPUT TEMPLATE" for details + --no-split-chapters Do not split video based on chapters + (default) + --remove-chapters REGEX Remove chapters whose title matches the + given regular expression. Time ranges + prefixed by a "*" can also be used in place + of chapters to remove the specified range. + Eg: --remove-chapters "*10:15-15:00" + --remove-chapters "intro". This option can + be used multiple times + --no-remove-chapters Do not remove any chapters from the file + (default) + --force-keyframes-at-cuts Force keyframes around the chapters before + removing/splitting them. Requires a re- + encode and thus is very slow, but the + resulting video may have fewer artifacts + around the cuts + --no-force-keyframes-at-cuts Do not force keyframes around the chapters + when cutting/splitting (default) + --use-postprocessor NAME[:ARGS] The (case sensitive) name of plugin + postprocessors to be enabled, and + (optionally) arguments to be passed to it, + separated by a colon ":". ARGS are a + semicolon ";" delimited list of NAME=VALUE. + The "when" argument determines when the + postprocessor is invoked. It can be one of + "pre_process" (after extraction), + "before_dl" (before video download), + "post_process" (after video download; + default), "after_move" (after moving file + to their final locations), "after_video" + (after downloading and processing all + formats of a video), or "playlist" (end of + playlist). This option can be used multiple + times to add different postprocessors + +## SponsorBlock Options: + Make chapter entries for, or remove various segments (sponsor, + introductions, etc.) from downloaded YouTube videos using the + SponsorBlock API (https://sponsor.ajay.app) + + --sponsorblock-mark CATS SponsorBlock categories to create chapters + for, separated by commas. Available + categories are all, default(=all), sponsor, + intro, outro, selfpromo, preview, filler, + interaction, music_offtopic, poi_highlight. + You can prefix the category with a "-" to + exempt it. See [1] for description of the + categories. Eg: --sponsorblock-mark + all,-preview [1] https://wiki.sponsor.ajay. + app/w/Segment_Categories + --sponsorblock-remove CATS SponsorBlock categories to be removed from + the video file, separated by commas. If a + category is present in both mark and + remove, remove takes precedence. The syntax + and available categories are the same as + for --sponsorblock-mark except that + "default" refers to "all,-filler" and + poi_highlight is not available + --sponsorblock-chapter-title TEMPLATE + The title template for SponsorBlock + chapters created by --sponsorblock-mark. + The same syntax as the output template is + used, but the only available fields are + start_time, end_time, category, categories, + name, category_names. Defaults to + "[SponsorBlock]: %(category_names)l" + --no-sponsorblock Disable both --sponsorblock-mark and + --sponsorblock-remove + --sponsorblock-api URL SponsorBlock API location, defaults to + https://sponsor.ajay.app + +## Extractor Options: + --extractor-retries RETRIES Number of retries for known extractor + errors (default is 3), or "infinite" + --allow-dynamic-mpd Process dynamic DASH manifests (default) + (Alias: --no-ignore-dynamic-mpd) + --ignore-dynamic-mpd Do not process dynamic DASH manifests + (Alias: --no-allow-dynamic-mpd) + --hls-split-discontinuity Split HLS playlists to different formats at + discontinuities such as ad breaks + --no-hls-split-discontinuity Do not split HLS playlists to different + formats at discontinuities such as ad + breaks (default) + --extractor-args KEY:ARGS Pass these arguments to the extractor. See + "EXTRACTOR ARGUMENTS" for details. You can + use this option multiple times to give + arguments for different extractors + +# CONFIGURATION + +You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: + +1. **Main Configuration**: The file given by `--config-location` +1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`/yt_dlp/__main__.py`), the root directory is used instead. +1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given +1. **User Configuration**: + * `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS) + * `%XDG_CONFIG_HOME%/yt-dlp.conf` + * `%APPDATA%/yt-dlp/config` (recommended on Windows) + * `%APPDATA%/yt-dlp/config.txt` + * `~/yt-dlp.conf` + * `~/yt-dlp.conf.txt` + + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` +1. **System Configuration**: `/etc/yt-dlp.conf` + +For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: +``` +# Lines starting with # are comments + +# Always extract audio +-x + +# Do not copy the mtime +--no-mtime + +# Use this proxy +--proxy 127.0.0.1:3128 + +# Save all videos under YouTube directory in your home directory +-o ~/YouTube/%(title)s.%(ext)s +``` + +Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. + +You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. + +### Authentication with `.netrc` file + +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: +``` +touch $HOME/.netrc +chmod a-rwx,u+rw $HOME/.netrc +``` +After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: +``` +machine login password +``` +For example: +``` +machine youtube login myaccount@gmail.com password my_youtube_password +machine twitch login my_twitch_account_name password my_twitch_password +``` +To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). + +The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it is `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`) or `%HOMEDRIVE%%HOMEPATH%` + +# OUTPUT TEMPLATE + +The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to. + + +**tl;dr:** [navigate me to examples](#output-template-examples). + + +The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). + +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. + +The field names themselves (the part inside the parenthesis) can also have some special formatting: +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields + +1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` + +1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` + +1. **Alternatives**: Alternate fields can be specified separated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` + +1. **Replacement**: A replacement value can specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. + +1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` + +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q`, `D`, `S` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated), a string **q**uoted for the terminal (flag `#` to split a list into different arguments), to add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and to **S**anitize as filename (flag `#` for restricted), respectively + +1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC + +To summarize, the general syntax for a field is: +``` +%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type +``` + +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. + +The available fields are: + + - `id` (string): Video identifier + - `title` (string): Video title + - `fulltitle` (string): Video title ignoring live timestamp and generic title + - `url` (string): Video URL + - `ext` (string): Video filename extension + - `alt_title` (string): A secondary title of the video + - `description` (string): The description of the video + - `display_id` (string): An alternative identifier for the video + - `uploader` (string): Full name of the video uploader + - `license` (string): License name the video is licensed under + - `creator` (string): The creator of the video + - `timestamp` (numeric): UNIX timestamp of the moment the video became available + - `upload_date` (string): Video upload date (YYYYMMDD) + - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified + - `modified_date` (string): The date (YYYYMMDD) when the video was last modified + - `uploader_id` (string): Nickname or id of the video uploader + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel + - `channel_follower_count` (numeric): Number of followers of the channel + - `location` (string): Physical location where the video was filmed + - `duration` (numeric): Length of the video in seconds + - `duration_string` (string): Length of the video (HH:mm:ss) + - `view_count` (numeric): How many users have watched the video on the platform + - `like_count` (numeric): Number of positive ratings of the video + - `dislike_count` (numeric): Number of negative ratings of the video + - `repost_count` (numeric): Number of reposts of the video + - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) + - `age_limit` (numeric): Age restriction for the video (years) + - `live_status` (string): One of "is_live", "was_live", "is_upcoming", "not_live" + - `is_live` (boolean): Whether this video is a live stream or a fixed-length video + - `was_live` (boolean): Whether this video was originally a live stream + - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites + - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public" + - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL + - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL + - `format` (string): A human-readable description of the format + - `format_id` (string): Format code specified by `--format` + - `format_note` (string): Additional info about the format + - `width` (numeric): Width of the video + - `height` (numeric): Height of the video + - `resolution` (string): Textual description of width and height + - `tbr` (numeric): Average bitrate of audio and video in KBit/s + - `abr` (numeric): Average audio bitrate in KBit/s + - `acodec` (string): Name of the audio codec in use + - `asr` (numeric): Audio sampling rate in Hertz + - `vbr` (numeric): Average video bitrate in KBit/s + - `fps` (numeric): Frame rate + - `dynamic_range` (string): The dynamic range of the video + - `vcodec` (string): Name of the video codec in use + - `container` (string): Name of the container format + - `filesize` (numeric): The number of bytes, if known in advance + - `filesize_approx` (numeric): An estimate for the number of bytes + - `protocol` (string): The protocol that will be used for the actual download + - `extractor` (string): Name of the extractor + - `extractor_key` (string): Key name of the extractor + - `epoch` (numeric): Unix epoch of when the information extraction was completed + - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` + - `video_autonumber` (numeric): Number that will be increased with each video + - `n_entries` (numeric): Total number of extracted items in the playlist + - `playlist_id` (string): Identifier of the playlist that contains the video + - `playlist_title` (string): Name of the playlist that contains the video + - `playlist` (string): `playlist_id` or `playlist_title` + - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index + - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist + - `playlist_uploader` (string): Full name of the playlist uploader + - `playlist_uploader_id` (string): Nickname or id of the playlist uploader + - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again + - `webpage_url_basename` (string): The basename of the webpage URL + - `webpage_url_domain` (string): The domain of the webpage URL + - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) + +Available for the video that belongs to some logical chapter or section: + + - `chapter` (string): Name or title of the chapter the video belongs to + - `chapter_number` (numeric): Number of the chapter the video belongs to + - `chapter_id` (string): Id of the chapter the video belongs to + +Available for the video that is an episode of some series or programme: + + - `series` (string): Title of the series or programme the video episode belongs to + - `season` (string): Title of the season the video episode belongs to + - `season_number` (numeric): Number of the season the video episode belongs to + - `season_id` (string): Id of the season the video episode belongs to + - `episode` (string): Title of the video episode + - `episode_number` (numeric): Number of the video episode within a season + - `episode_id` (string): Id of the video episode + +Available for the media that is a track or a part of a music album: + + - `track` (string): Title of the track + - `track_number` (numeric): Number of the track within an album or a disc + - `track_id` (string): Id of the track + - `artist` (string): Artist(s) of the track + - `genre` (string): Genre(s) of the track + - `album` (string): Title of the album the track belongs to + - `album_type` (string): Type of the album + - `album_artist` (string): List of all artists appeared on the album + - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to + - `release_year` (numeric): Year (YYYY) when the album was released + +Available for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: + + - `section_title` (string): Title of the chapter + - `section_number` (numeric): Number of the chapter within the file + - `section_start` (numeric): Start time of the chapter in seconds + - `section_end` (numeric): End time of the chapter in seconds + +Available only when used in `--print`: + + - `urls` (string): The URLs of all requested formats, one in each line + - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete + - `formats_table` (table): The video format table as printed by `--list-formats` + - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` + - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` + - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` + + +Available only in `--sponsorblock-chapter-title`: + + - `start_time` (numeric): Start time of the chapter in seconds + - `end_time` (numeric): End time of the chapter in seconds + - `categories` (list): The SponsorBlock categories the chapter belongs to + - `category` (string): The smallest SponsorBlock category the chapter belongs to + - `category_names` (list): Friendly names of the categories + - `name` (string): Friendly name of the smallest category + +Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. + +Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). + +**Tip**: Look at the `-j` output to identify which fields are available for the particular URL + +For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. + +Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. + +To use percent literals in an output template use `%%`. To output to stdout use `-o -`. + +The current default template is `%(title)s [%(id)s].%(ext)s`. + +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. + + +#### Output template and Windows batch files + +If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + + +#### Output template examples + +```bash +$ yt-dlp --get-filename -o "test video.%(ext)s" BaW_jenozKc +test video.webm # Literal name with correct extension + +$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc +youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters + +$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames +youtube-dl_test_video_.webm # Restricted file name + +# Download YouTube playlist videos in separate directory indexed by video order in a playlist +$ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +# Download YouTube playlist videos in separate directories according to their uploaded year +$ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +# Prefix playlist index with " - " separator, but only if it is available +$ yt-dlp -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" + +# Download all playlists of YouTube channel/user keeping each playlist in separate directory: +$ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" + +# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home +$ yt-dlp -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial" + +# Download entire series season keeping each series and each season in separate directory under C:/MyVideos +$ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617" + +# Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" +# and put all temporary files in "C:\MyVideos\tmp" +$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs + +# Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" +$ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs + +# Stream the video being downloaded to stdout +$ yt-dlp -o - BaW_jenozKc +``` + +# FORMAT SELECTION + +By default, yt-dlp tries to download the best available quality if you **don't** pass any options. +This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. + +**Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. + +The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. + + +**tl;dr:** [navigate me to examples](#format-selection-examples). + + +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. + +You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. + +You can use `-f -` to interactively provide the format selector *for each video* + +You can also use special names to select particular edge case formats: + + - `all`: Select **all formats** separately + - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both) + - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio + - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` + - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` + - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` + - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]` + - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` + - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio + - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` + - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` + - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` + - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` + - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` + +For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. + +You can select the n'th best format of a type by using `best.`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. + +If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. + +If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. + +You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. + +**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video + +Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. + +## Filtering Formats + +You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). + +The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + + - `filesize`: The number of bytes, if known in advance + - `width`: Width of the video, if known + - `height`: Height of the video, if known + - `tbr`: Average bitrate of audio and video in KBit/s + - `abr`: Average audio bitrate in KBit/s + - `vbr`: Average video bitrate in KBit/s + - `asr`: Audio sampling rate in Hertz + - `fps`: Frame rate + +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + + - `ext`: File extension + - `acodec`: Name of the audio codec in use + - `vcodec`: Name of the video codec in use + - `container`: Name of the container format + - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) + - `format_id`: A short description of the format + - `language`: Language code + +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). + +Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. + +Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter. For example, `-f "all[vcodec=none]"` selects all audio-only formats. + +Format selectors can also be grouped using parentheses, for example if you want to download the best pre-merged mp4 and webm formats with a height lower than 480 you can use `-f "(mp4,webm)[height<480]"`. + +## Sorting Formats + +You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. + +The available fields are: + + - `hasvid`: Gives priority to formats that has a video stream + - `hasaud`: Gives priority to formats that has a audio stream + - `ie_pref`: The format preference + - `lang`: The language preference + - `quality`: The quality of the format + - `source`: The preference of the source + - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) + - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) + - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) + - `codec`: Equivalent to `vcodec,acodec` + - `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. + - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. + - `ext`: Equivalent to `vext,aext` + - `filesize`: Exact filesize, if known in advance + - `fs_approx`: Approximate filesize calculated from the manifests + - `size`: Exact filesize if available, otherwise approximate filesize + - `height`: Height of video + - `width`: Width of video + - `res`: Video resolution, calculated as the smallest dimension. + - `fps`: Framerate of video + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) + - `tbr`: Total average bitrate in KBit/s + - `vbr`: Average video bitrate in KBit/s + - `abr`: Average audio bitrate in KBit/s + - `br`: Equivalent to using `tbr,vbr,abr` + - `asr`: Audio sample rate in Hz + +**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. + +All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. + +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. + +Note that the default has `codec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. + +If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. + +**Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best). + +## Format Selection examples + +```bash +# Download and merge the best video-only format and the best audio-only format, +# or download the best combined format if video-only format is not available +$ yt-dlp -f "bv+ba/b" + +# Download best format that contains video, +# and if it doesn't already have an audio stream, merge it with best audio-only format +$ yt-dlp -f "bv*+ba/b" + +# Same as above +$ yt-dlp + +# Download the best video-only format and the best audio-only format without merging them +# For this case, an output template should be used since +# by default, bestvideo and bestaudio will have the same file name. +$ yt-dlp -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s" + +# Download and merge the best format that has a video stream, +# and all audio-only formats into one file +$ yt-dlp -f "bv*+mergeall[vcodec=none]" --audio-multistreams + +# Download and merge the best format that has a video stream, +# and the best 2 audio-only formats into one file +$ yt-dlp -f "bv*+ba+ba.2" --audio-multistreams + + +# The following examples show the old method (without -S) of format selection +# and how to use -S to achieve a similar but (generally) better result + +# Download the worst video available (old method) +$ yt-dlp -f "wv*+wa/w" + +# Download the best video available but with the smallest resolution +$ yt-dlp -S "+res" + +# Download the smallest video available +$ yt-dlp -S "+size,+br" + + + +# Download the best mp4 video available, or the best video if no mp4 available +$ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" + +# Download the best video with the best extension +# (For video, mp4 > webm > flv. For audio, m4a > aac > mp3 ...) +$ yt-dlp -S "ext" + + + +# Download the best video available but no better than 480p, +# or the worst video if there is no video under 480p +$ yt-dlp -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w" + +# Download the best video available with the largest height but no better than 480p, +# or the best video with the smallest resolution if there is no video under 480p +$ yt-dlp -S "height:480" + +# Download the best video available with the largest resolution but no better than 480p, +# or the best video with the smallest resolution if there is no video under 480p +# Resolution is determined by using the smallest dimension. +# So this works correctly for vertical videos as well +$ yt-dlp -S "res:480" + + + +# Download the best video (that also has audio) but no bigger than 50 MB, +# or the worst video (that also has audio) if there is no video under 50 MB +$ yt-dlp -f "b[filesize<50M] / w" + +# Download largest video (that also has audio) but no bigger than 50 MB, +# or the smallest video (that also has audio) if there is no video under 50 MB +$ yt-dlp -f "b" -S "filesize:50M" + +# Download best video (that also has audio) that is closest in size to 50 MB +$ yt-dlp -f "b" -S "filesize~50M" + + + +# Download best video available via direct link over HTTP/HTTPS protocol, +# or the best video available via any protocol if there is no such video +$ yt-dlp -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)" + +# Download best video available via the best protocol +# (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...) +$ yt-dlp -S "proto" + + + +# Download the best video with h264 codec, or the best video if there is no such video +$ yt-dlp -f "(bv*[vcodec^=avc1]+ba) / (bv*+ba/b)" + +# Download the best video with best codec no better than h264, +# or the best video with worst codec if there is no such video +$ yt-dlp -S "codec:h264" + +# Download the best video with worst codec no worse than h264, +# or the best video with best codec if there is no such video +$ yt-dlp -S "+codec:h264" + + + +# More complex examples + +# Download the best video no better than 720p preferring framerate greater than 30, +# or the worst video (still preferring framerate greater than 30) if there is no such video +$ yt-dlp -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)" + +# Download the video with the largest resolution no better than 720p, +# or the video with the smallest resolution available if there is no such video, +# preferring larger framerate for formats with the same resolution +$ yt-dlp -S "res:720,fps" + + + +# Download the video with smallest resolution no worse than 480p, +# or the video with the largest resolution available if there is no such video, +# preferring better codec and then larger total bitrate for the same resolution +$ yt-dlp -S "+res:480,codec,br" +``` + +# MODIFYING METADATA + +The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata` + +`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. + +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. + +Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. + +This option also has a few special uses: +* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description +* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values. + +For reference, these are the fields yt-dlp adds by default to the file metadata: + +Metadata fields|From +:---|:--- +`title`|`track` or `title` +`date`|`upload_date` +`description`, `synopsis`|`description` +`purl`, `comment`|`webpage_url` +`track`|`track_number` +`artist`|`artist`, `creator`, `uploader` or `uploader_id` +`genre`|`genre` +`album`|`album` +`album_artist`|`album_artist` +`disc`|`disc_number` +`show`|`series` +`season_number`|`season_number` +`episode_id`|`episode` or `episode_id` +`episode_sort`|`episode_number` +`language` of each stream|From the format's `language` +**Note**: The file format may not support some of these fields + + +## Modifying metadata examples + +```bash +# Interpret the title as "Artist - Title" +$ yt-dlp --parse-metadata "title:%(artist)s - %(title)s" + +# Regex example +$ yt-dlp --parse-metadata "description:Artist - (?P.+)" + +# Set title as "Series name S01E05" +$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" + +# Set "comment" field in video metadata using description instead of webpage_url +$ yt-dlp --parse-metadata "description:(?s)(?P.+)" --add-metadata + +# Remove "formats" field from the infojson by setting it to an empty string +$ yt-dlp --parse-metadata ":(?P)" -j + +# Replace all spaces and "_" in title and uploader with a `-` +$ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" + +``` + +# EXTRACTOR ARGUMENTS + +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` + +The following extractors use this feature: + +#### youtube +* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. +* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details +* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) +* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) +* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`. + * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total. +* `max_comment_depth` Maximum depth for nested comments. YouTube supports depths 1 or 2 (default) + * **Deprecated**: Set `max-replies` to `0` or `all` in `max_comments` instead (e.g. `max_comments=all,all,0` to get no replies) + +#### youtubetab (YouTube playlists, channels, feeds, etc.) +* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) + +#### funimation +* `language`: Languages to extract. Eg: `funimation:language=english,japanese` +* `version`: The video version to extract - `uncut` or `simulcast` + +#### crunchyroll +* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp` +* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS` + +#### crunchyroll:beta +* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls` + * Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash` +* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None` + +#### vikichannel +* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` + +#### youtubewebarchive +* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures` + +#### gamejolt +* `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side) + +#### hotstar +* `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd` +* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` +* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` + +#### tiktok +* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`) +* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`) + +NOTE: These options may be changed/removed in the future without concern for backward compatibility + + + + +# PLUGINS + +Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version + +Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. + +See [ytdlp_plugins](ytdlp_plugins) for example plugins. + +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code + +If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability + + + +# EMBEDDING YT-DLP + +yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. + +Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. + +From a Python program, you can embed yt-dlp in a more powerful fashion, like this: + +```python +from yt_dlp import YoutubeDL + +ydl_opts = {'format': 'bestaudio'} +with YoutubeDL(ydl_opts) as ydl: + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +``` + +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L191). + +Here's a more complete example demonstrating various functionality: + +```python +import json +import yt_dlp + + +class MyLogger: + def debug(self, msg): + # For compatibility with youtube-dl, both debug and info are passed into debug + # You can distinguish them by the prefix '[debug] ' + if msg.startswith('[debug] '): + pass + else: + self.info(msg) + + def info(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + print(msg) + + +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor +class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run + def run(self, info): + self.to_screen('Doing stuff') + return [], info + + +# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now converting ...') + + +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + yield { + # These are the minimum required fields for a merged format + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + separated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options +ydl_opts = { + 'format': format_selector, + 'postprocessors': [{ + # Embed metadata in video using ffmpeg. + # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts + 'key': 'FFmpegMetadata', + 'add_chapters': True, + 'add_metadata': True, + }], + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + + +# Add custom headers +yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) + +# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. +# Eg: "ydl.download", "ydl.download_with_info_file" +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.add_post_processor(MyCustomPP()) + info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + + # ℹ️ ydl.sanitize_info makes the info json-serializable + print(json.dumps(ydl.sanitize_info(info))) +``` + +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + + + + +# DEPRECATED OPTIONS + +These are all the deprecated options and the current alternative to achieve the same effect + +#### Almost redundant options +While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant + + -j, --dump-json --print "%()j" + -F, --list-formats --print formats_table + --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table + --list-subs --print automatic_captions_table --print subtitles_table + +#### Redundant options +While these options are redundant, they are still expected to be used due to their ease of use + + --get-description --print description + --get-duration --print duration_string + --get-filename --print filename + --get-format --print format + --get-id --print id + --get-thumbnail --print thumbnail + -e, --get-title --print title + -g, --get-url --print urls + --match-title REGEX --match-filter "title ~= (?i)REGEX" + --reject-title REGEX --match-filter "title !~= (?i)REGEX" + --min-views COUNT --match-filter "view_count >=? COUNT" + --max-views COUNT --match-filter "view_count <=? COUNT" + + +#### Not recommended +While these options still work, their use is not recommended since there are other alternatives to achieve the same + + --exec-before-download CMD --exec "before_dl:CMD" + --no-exec-before-download --no-exec + --all-formats -f all + --all-subs --sub-langs all --write-subs + --print-json -j --no-simulate + --autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d + --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s + --id -o "%(id)s.%(ext)s" + --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" + --hls-prefer-native --downloader "m3u8:native" + --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" + --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) + --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) + --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) + --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) + --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) + --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) + + +#### Developer options +These options are not intended to be used by the end-user + + --test Download only part of video for testing extractors + --youtube-print-sig-code For testing youtube signatures + --allow-unplayable-formats List unplayable formats also + --no-allow-unplayable-formats Default + + +#### Old aliases +These are aliases that are no longer documented for various reasons + + --avconv-location --ffmpeg-location + --cn-verification-proxy URL --geo-verification-proxy URL + --dump-headers --print-traffic + --dump-intermediate-pages --dump-pages + --force-write-download-archive --force-write-archive + --load-info --load-info-json + --no-split-tracks --no-split-chapters + --no-write-srt --no-write-subs + --prefer-unsecure --prefer-insecure + --rate-limit RATE --limit-rate RATE + --split-tracks --split-chapters + --srt-lang LANGS --sub-langs LANGS + --trim-file-names LENGTH --trim-filenames LENGTH + --write-srt --write-subs + --yes-overwrites --force-overwrites + +#### Sponskrub Options +Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options + + --sponskrub --sponsorblock-mark all + --no-sponskrub --no-sponsorblock + --sponskrub-cut --sponsorblock-remove all + --no-sponskrub-cut --sponsorblock-remove -all + --sponskrub-force Not applicable + --no-sponskrub-force Not applicable + --sponskrub-location Not applicable + --sponskrub-args Not applicable + +#### No longer supported +These options may no longer work as intended + + --prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg) + --prefer-ffmpeg Default (Alias: --no-prefer-avconv) + -C, --call-home Not implemented + --no-call-home Default + --include-ads No longer supported + --no-include-ads Default + --write-annotations No supported site has annotations now + --no-write-annotations Default + +#### Removed +These options were deprecated since 2014 and have now been entirely removed + + -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" + -t, --title -o "%(title)s-%(id)s.%(ext)s" + -l, --literal -o accepts literal names + +# CONTRIBUTING +See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) + +# MORE +For FAQ see the [youtube-dl README](https://github.com/ytdl-org/youtube-dl#faq) + + diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/RECORD b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/RECORD new file mode 100644 index 0000000..887c956 --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/RECORD @@ -0,0 +1,1939 @@ +../../../bin/yt-dlp,sha256=SYTGI31VmAFW7J1J4Sxkg0fhpeR_rtKeudLYYQ7qwIg,253 +../../../share/bash-completion/completions/yt-dlp,sha256=4PqeC3e6N9m-2bz7u8WEUDTww0T5_1uieihQvgEGcCw,5786 +../../../share/doc/yt_dlp/README.txt,sha256=okCgHh8S5s0cyz4If2peWC8feh9afEAnWn_ZqwiRtKM,125275 +../../../share/fish/vendor_completions.d/yt-dlp.fish,sha256=4aZwMktq8Pcxk3tkqA01uIFgG3FgBuxxBJ-ClrYPhiw,45620 +../../../share/man/man1/yt-dlp.1,sha256=kiUdVkMal50AxlkrlGReZsDTodExRf3K0QnT0J4bZsU,120599 +../../../share/zsh/site-functions/_yt-dlp,sha256=0ts-BswcTtbV3GSaOLPceoufO2iz_p8k89Gnqm2h8jo,5892 +yt_dlp-2022.2.4.dist-info/AUTHORS,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +yt_dlp-2022.2.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +yt_dlp-2022.2.4.dist-info/LICENSE,sha256=fhLl30uuEsshWBuhV87SDhmGoFCN0Q0Oikq5pM-U6Fw,1211 +yt_dlp-2022.2.4.dist-info/METADATA,sha256=eiAzwCFl3q3RzmO1RO_sDaLDDj_uuO03xtplptDHnP0,134580 +yt_dlp-2022.2.4.dist-info/RECORD,, +yt_dlp-2022.2.4.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +yt_dlp-2022.2.4.dist-info/WHEEL,sha256=z9j0xAa_JmUKMpmz72K0ZGALSM_n-wQVmGbleXx2VHg,110 +yt_dlp-2022.2.4.dist-info/entry_points.txt,sha256=3waLDIrlirkbfXwyX_Og7UDVaS5-C_epiwQ35ypk6uA,40 +yt_dlp-2022.2.4.dist-info/top_level.txt,sha256=tOAf2ifpXPnc7BTwtlvcX1Bl-AATuzfiRgy2s3pgqzI,7 +yt_dlp/YoutubeDL.py,sha256=jLXvbaQlNPhzXqkDL2y8P62tRWD0784_Yp4hhm7dA80,181730 +yt_dlp/__init__.py,sha256=sHizyVWVlmYVVQznhKndCDjrmaoXK6xz2AMJ_fdzIek,39289 +yt_dlp/__main__.py,sha256=ENpO7OoHiWvkGEf_yEx9nEOLvMi1_LQx4NdW65VN77M,452 +yt_dlp/__pycache__/YoutubeDL.cpython-310.pyc,, +yt_dlp/__pycache__/__init__.cpython-310.pyc,, +yt_dlp/__pycache__/__main__.cpython-310.pyc,, +yt_dlp/__pycache__/aes.cpython-310.pyc,, +yt_dlp/__pycache__/cache.cpython-310.pyc,, +yt_dlp/__pycache__/compat.cpython-310.pyc,, +yt_dlp/__pycache__/cookies.cpython-310.pyc,, +yt_dlp/__pycache__/jsinterp.cpython-310.pyc,, +yt_dlp/__pycache__/minicurses.cpython-310.pyc,, +yt_dlp/__pycache__/options.cpython-310.pyc,, +yt_dlp/__pycache__/socks.cpython-310.pyc,, +yt_dlp/__pycache__/update.cpython-310.pyc,, +yt_dlp/__pycache__/utils.cpython-310.pyc,, +yt_dlp/__pycache__/version.cpython-310.pyc,, +yt_dlp/__pycache__/webvtt.cpython-310.pyc,, +yt_dlp/aes.py,sha256=aAr6JKDcDyzQ9yKBUn6ZgfQgIidXzcnd2eqXyv1Qt-k,21157 +yt_dlp/cache.py,sha256=O10MJs0xR1HSkYAz5v1aY78FOjxJYawTCDxhHQTcKDM,3128 +yt_dlp/compat.py,sha256=KHZGwCOf0PKGuhgjZuxYNiIF9WPRonG50NSi2JP9BpQ,8840 +yt_dlp/cookies.py,sha256=xDFLugSp72ooiXnoJmEioHd3QQP9JF9X6t-UiXKsmGA,37559 +yt_dlp/downloader/__init__.py,sha256=335F5hE8Mlw_tDZeEfngk1XGWgw5y7IvkUBSTJC79kI,4427 +yt_dlp/downloader/__pycache__/__init__.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/common.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/dash.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/external.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/f4m.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/fragment.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/hls.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/http.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/ism.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/mhtml.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/niconico.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/rtmp.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/rtsp.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/websocket.cpython-310.pyc,, +yt_dlp/downloader/__pycache__/youtube_live_chat.cpython-310.pyc,, +yt_dlp/downloader/common.py,sha256=Eo4f_2ETroXezkLCAvyoidtYN3VrLnCCSfR8JzxtBHc,18377 +yt_dlp/downloader/dash.py,sha256=E9UNfZckVSs2uYi_SL7bpEZfZVAoGyX-b5lpZ8otGIg,3060 +yt_dlp/downloader/external.py,sha256=3_iyszcmW6u1b8xS1YmWnKfRTB6Uwkvm4CYtSm-mLKE,21400 +yt_dlp/downloader/f4m.py,sha256=HBIytHbivQn-l_YL9ApDLFeSk8StUvKsTcn4_Nva-2E,15548 +yt_dlp/downloader/fragment.py,sha256=owAGSCVUlyvcZbgsrX8ub2ReLDJYO4NQnP41SByxBRg,22149 +yt_dlp/downloader/hls.py,sha256=aiwLChW6lpvpqaBmLaFtN6OBhSwR0oLpdgUaYJ6EmNw,16270 +yt_dlp/downloader/http.py,sha256=XCfnAuBKwRTYUg2yo3CApTc6J0iN-eZHD_m2u9K4kLU,17301 +yt_dlp/downloader/ism.py,sha256=T513MbAvkTdd2cRpf6eePR1SzUCwV17hUFg1R4xmSXg,11729 +yt_dlp/downloader/mhtml.py,sha256=D9Kx3Vhyx4FNQ7ktokiIuwL-0uxZFhs519K4v9aMrBE,6441 +yt_dlp/downloader/niconico.py,sha256=c_q4g4XMh8WwlUn1JRoFi1wrzZLsZ30fIBgBArp19qY,2016 +yt_dlp/downloader/rtmp.py,sha256=-RbVr5ypQqCsqs_Hh29-qYivEw7iFkChk7IYTXa1Clg,9019 +yt_dlp/downloader/rtsp.py,sha256=gcAPy8-Ja7alg30Lj9nNZwX_JPUFfbaYlUIvKe01FXM,1569 +yt_dlp/downloader/websocket.py,sha256=MOvI2yKUzpJ8ZdYwWDI1ZDbZrbVVI6KsQwpbBwjh3Gc,1994 +yt_dlp/downloader/youtube_live_chat.py,sha256=h808kIygjIfo4oJGesHzt1AqWG-hfpglY_e7xxJTwLA,11285 +yt_dlp/extractor/__init__.py,sha256=iWItUfJy9wfIDuIoSxoakSqSu9y-pXFBIk0UCQn2du8,1486 +yt_dlp/extractor/__pycache__/__init__.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/abc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/abcnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/abcotvs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/academicearth.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/acast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/adn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/adobeconnect.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/adobepass.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/adobetv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/adultswim.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aenetworks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/afreecatv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/airmozilla.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aliexpress.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aljazeera.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/allocine.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/alphaporno.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/alura.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/amara.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/amazon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/amcnetworks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/americastestkitchen.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/amp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/animelab.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/animeondemand.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/anvato.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aol.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/apa.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aparat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/appleconnect.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/applepodcasts.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/appletrailers.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/archiveorg.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/arcpublishing.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ard.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/arkena.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/arnes.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/arte.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/asiancrush.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/atresplayer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/atttechchannel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/atvat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/audimedia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/audioboom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/audiomack.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/audius.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/awaan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/aws.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/azmedien.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/baidu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bandaichannel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bandcamp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bannedvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bbc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/beatport.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/beeg.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/behindkink.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bellmedia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bfi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bfmtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bibeltv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bigflix.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bild.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bilibili.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/biobiochiletv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/biqle.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bitchute.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bitwave.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/blackboardcollaborate.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bleacherreport.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/blinkx.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/blogger.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bloomberg.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bokecc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bongacams.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bostonglobe.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/box.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bpb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/br.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/bravotv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/breakcom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/breitbart.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/brightcove.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/businessinsider.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/buzzfeed.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/byutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/c56.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cableav.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/callin.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cam4.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/camdemy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cammodels.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/camwithher.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/canalalpha.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/canalc2.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/canalplus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/canvas.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/carambatv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cartoonnetwork.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbsinteractive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbslocal.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbsnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cbssports.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ccc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ccma.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cctv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cda.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ceskatelevize.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cgtn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/channel9.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/charlierose.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/chaturbate.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/chilloutzone.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/chingari.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/chirbit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cinchcast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cinemax.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ciscolive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ciscowebex.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cjsw.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cliphunter.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/clippit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cliprs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/clipsyndicate.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/closertotruth.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cloudflarestream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cloudy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/clubic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/clyp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cmt.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cnbc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cnn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/comedycentral.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/common.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/commonmistakes.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/commonprotocols.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/condenast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/contv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/corus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/coub.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cozytv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cracked.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/crackle.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/crooksandliars.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/crowdbunker.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/crunchyroll.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cspan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ctsnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ctv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ctvnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cultureunplugged.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/curiositystream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/cwtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/daftsex.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dailymail.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dailymotion.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/damtomo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/daum.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dbtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dctp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/deezer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/defense.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/democracynow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dfb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dhm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/digg.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/digitalconcerthall.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/digiteka.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/discovery.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/discoverygo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/discoveryvr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/disney.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dispeak.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dlive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/doodstream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dotsub.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/douyutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/drbonanza.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dreisat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/drooble.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dropbox.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dropout.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/drtuber.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/drtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/duboku.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dumpert.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dvtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/dw.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eagleplatform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ebaumsworld.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/echomsk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/egghead.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ehow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eighttracks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/einthusan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eitb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ellentube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/elonet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/elpais.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/embedly.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/engadget.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/epicon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eporner.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eroprofile.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ertgr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/escapist.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/espn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/esri.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/europa.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/europeantour.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/euscreen.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/everyonesmixtape.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/expotv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/expressen.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/extractors.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/extremetube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/eyedotv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/facebook.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fancode.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/faz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fc2.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fczenit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/filmmodu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/filmon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/filmweb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/firsttv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fivemin.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fivetv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/flickr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/folketinget.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/footyroom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/formula1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fourtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fox.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fox9.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/foxgay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/foxnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/foxsports.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/franceculture.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/franceinter.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/francetv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/freesound.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/freespeech.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/freshlive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/frontendmasters.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fujitv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/funimation.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/funk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fusion.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/fxnetworks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gab.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gaia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gameinformer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gamejolt.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gamespot.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gamestar.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gaskrank.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gazeta.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gdcvault.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gedidigital.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/generic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gettr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gfycat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/giantbomb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/giga.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gigya.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/glide.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/globo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/glomex.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/go.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/godtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gofile.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/golem.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/googledrive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/googlepodcasts.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/googlesearch.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gopro.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/goshgay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gotostage.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gputechconf.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/gronkh.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/groupon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hbo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hearthisat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/heise.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hellporno.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/helsinki.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hentaistigma.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hgtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hidive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/historicfilms.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hitbox.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hitrecord.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hketv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hornbunny.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hotnewhiphop.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hotstar.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/howcast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/howstuffworks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hrfensehen.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hrti.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hse.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/huajiao.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/huffpost.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hungama.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/hypem.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ichinanalive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ign.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/iheart.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/imdb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/imggaming.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/imgur.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ina.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/inc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/indavideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/infoq.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/instagram.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/internazionale.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/internetvideoarchive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/iprima.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/iqiyi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ir90tv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/itv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ivi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ivideon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/iwara.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/izlesene.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/jamendo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/jeuxvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/joj.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/jove.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/jwplatform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kakao.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kaltura.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kanalplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kankan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/karaoketv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/karrierevideos.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/keezmovies.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kelbyone.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ketnet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/khanacademy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kickstarter.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kinja.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kinopoisk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/konserthusetplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/koo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/krasview.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ku6.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kusi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/kuwo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/la7.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/laola1tv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lazy_extractors.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lbry.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lci.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lcp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lecture2go.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lecturio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/leeco.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lego.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lemonde.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lenta.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/libraryofcongress.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/libsyn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lifenews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/limelight.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/line.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/linkedin.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/linuxacademy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/litv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/livejournal.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/livestream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lnkgo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/localnews8.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lovehomeporn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lrt.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/lynda.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/m6.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/magentamusik360.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mailru.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mainstreaming.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/malltv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mangomolo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/manoto.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/manyvids.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/maoritv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/markiza.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/massengeschmacktv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/matchtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mdr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/medaltv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mediaite.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mediaklikk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/medialaan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mediaset.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mediasite.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/medici.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/megaphone.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/megatvcom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/meipai.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/melonvod.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/meta.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/metacafe.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/metacritic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mgoon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mgtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/miaopai.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/microsoftstream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/microsoftvirtualacademy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mildom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/minds.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ministrygrid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/minoto.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/miomio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mirrativ.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mitele.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mixch.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mixcloud.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mlb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mlssoccer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mnet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/moevideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mofosex.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mojvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/morningstar.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/motherless.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/motorsport.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/movieclips.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/moviezine.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/movingimage.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/msn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/muenchentv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/musescore.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/musicdex.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mwave.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mxplayer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/mychannels.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/myspace.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/myspass.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/myvi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/myvideoge.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/myvidster.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/n1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nate.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nationalgeographic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/naver.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nba.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nbc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ndr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ndtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nebula.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nerdcubed.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/neteasemusic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/netzkino.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/newgrounds.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/newstube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/newsy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nextmedia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nexx.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nfhsnetwork.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nfl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nhk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nhl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nick.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/niconico.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ninecninemedia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ninegag.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ninenow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nintendo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nitter.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/njpwworld.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nobelprize.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/noco.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nonktube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/noodlemagazine.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/noovo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/normalboots.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nosvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nova.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/novaplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nowness.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/noz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/npo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/npr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nrk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nrl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ntvcojp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ntvde.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ntvru.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nuevo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nuvid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nytimes.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nzherald.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/nzz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/odatv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/odnoklassniki.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/oktoberfesttv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/olympics.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/on24.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/once.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ondemandkorea.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/onefootball.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/onet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/onionstudios.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ooyala.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/opencast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/openload.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/openrec.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ora.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/orf.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/outsidetv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/packtpub.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/palcomp3.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pandoratv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/paramountplus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/parliamentliveuk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/parlview.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/patreon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pbs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pearvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/peertube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/peertv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/peloton.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/people.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/performgroup.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/periscope.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/philharmoniedeparis.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/phoenix.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/photobucket.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/picarto.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/piksel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pinkbike.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pinterest.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pixivsketch.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pladform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/planetmarathi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/platzi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playfm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playplustv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/plays.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playstuff.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playtvak.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playvid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/playwire.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pluralsight.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/plutotv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/podomatic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pokemon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pokergo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/polsatgo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/polskieradio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/popcorntimes.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/popcorntv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/porn91.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/porncom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornez.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornflip.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornhd.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornhub.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornotube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornovoisines.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pornoxo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/presstv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/projectveritas.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/prosiebensat1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/prx.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/puhutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/puls4.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/pyvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/qqmusic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/r7.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiko.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiobremen.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiocanada.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiode.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiofrance.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiojavan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiokapital.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radiozet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/radlive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rai.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/raywenderlich.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rbmaradio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rcs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rcti.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rds.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/redbulltv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/reddit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/redgifs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/redtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/regiotv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rentv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/restudy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/reuters.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/reverbnation.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rice.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rmcdecouverte.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ro220.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rockstargames.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/roosterteeth.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rottentomatoes.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/roxwel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rozhlas.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtbf.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rte.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtl2.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtlnl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtnews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtrfm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rts.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtve.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtvnh.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rtvs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ruhd.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rule34video.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rumble.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rutube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/rutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ruutu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ruv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/safari.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/saitosan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/samplefocus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sapo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/savefrom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sbs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/screencast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/screencastomatic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/scrippsnetworks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/scte.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/seeker.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/senategov.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sendtonews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/servus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sevenplus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sexu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/seznamzpravy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/shahid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/shared.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/shemaroome.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/showroomlive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/simplecast.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sina.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sixplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/skeb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sky.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/skyit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/skylinewebcams.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/skynewsarabia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/skynewsau.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/slideshare.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/slideslive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/slutload.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/snotr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sohu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sonyliv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/soundcloud.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/soundgasm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/southpark.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sovietscloset.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spankbang.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spankwire.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spiegel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spiegeltv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spike.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sport5.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sportbox.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sportdeutschland.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spotify.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/spreaker.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/springboardplatform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sprout.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/srgssr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/srmediathek.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/stanfordoc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/startv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/steam.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/stitcher.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/storyfire.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streamable.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streamanity.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streamcloud.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streamcz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streamff.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/streetvoice.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/stretchinternet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/stripchat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/stv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sunporno.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sverigesradio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/svt.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/swrmediathek.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/syfy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/sztvhu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tagesschau.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tass.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tastytrade.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tbs.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tdslifeway.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teachable.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teachertube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teachingchannel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teamcoco.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teamtreehouse.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/techtalks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ted.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tele13.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tele5.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telebruxelles.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telecinco.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telegraaf.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telemb.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telemundo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telequebec.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/teletask.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/telewebion.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tennistv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tenplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/testurl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tf1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tfo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/theintercept.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/theplatform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thescene.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thestar.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thesun.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/theta.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/theweatherchannel.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thisamericanlife.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thisav.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/thisoldhouse.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/threeqsdn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/threespeak.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tiktok.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tinypic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tmz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tnaflix.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/toggle.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/toggo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tokentube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tonline.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/toongoggles.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/toutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/toypics.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/traileraddict.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/trilulilu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/trovo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/trueid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/trunews.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/trutv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tube8.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tubitv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tudou.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tumblr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tunein.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tunepk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/turbo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/turner.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv2.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv2dk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv2hu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv4.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv5mondeplus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tv5unis.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tva.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvanouvelles.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvc.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tver.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvigle.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvland.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvn24.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvnet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvnoe.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvnow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvopengr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvp.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tvplayer.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/tweakers.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twentyfourvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twentymin.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twentythreevideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twitcasting.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twitch.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/twitter.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/udemy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/udn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ufctv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ukcolumn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/uktvplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/umg.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/unistra.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/unity.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/uol.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/uplynk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/urort.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/urplay.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/usanetwork.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/usatoday.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ustream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ustudio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/utreon.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/varzesh3.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vbox7.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/veehd.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/veo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/veoh.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vesti.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vevo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vgtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vh1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vice.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vidbit.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viddler.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/videa.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/videodetective.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/videofyme.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/videomore.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/videopress.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vidio.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vidlii.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vidzi.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vier.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viewlift.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viidea.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viki.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vimeo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vimm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vimple.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vine.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viqeo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/viu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vlive.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vodlocker.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vodpl.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vodplatform.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/voicerepublic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/voicy.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/voot.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/voxmedia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vrak.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vrt.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vrv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vshare.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vtm.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vuclip.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vupload.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vvvvid.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vyborymos.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/vzaar.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wakanim.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/walla.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/washingtonpost.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wat.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/watchbox.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/watchindianporn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wdr.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/webcaster.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/webofstories.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/weibo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/weiqitv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/whowatch.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/willow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wimtv.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wistia.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/worldstarhiphop.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wppilot.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wsj.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/wwe.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xbef.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xboxclips.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xfileshare.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xhamster.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xiami.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ximalaya.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xminus.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xnxx.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xstream.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xuite.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xvideos.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/xxxymovies.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yahoo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yandexdisk.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yandexmusic.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yandexvideo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yapfiles.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yesjapan.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yinyuetai.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/ynet.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/youjizz.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/youku.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/younow.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/youporn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yourporn.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/yourupload.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/youtube.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zapiks.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zaq1.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zattoo.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zdf.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zee5.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zhihu.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zingmp3.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zoom.cpython-310.pyc,, +yt_dlp/extractor/__pycache__/zype.cpython-310.pyc,, +yt_dlp/extractor/abc.py,sha256=px3qxvn7IrypwOTohRBomyWiXPAXqKTZPwvnO_rA3F4,12841 +yt_dlp/extractor/abcnews.py,sha256=v_OXq1sV87eF2W5px0EWg8ULe5SqW5Zq4kA8CzzHiaY,6386 +yt_dlp/extractor/abcotvs.py,sha256=_w7MM3PJ_eIO0D0H8ZD9Tn49qhp1xYElxJKWUz2xNd0,4726 +yt_dlp/extractor/academicearth.py,sha256=wHYzyKiiJzKRzE28dbAH4NdijUuTts06BQ_00KF8kcU,1399 +yt_dlp/extractor/acast.py,sha256=idDTLwpS8Jh9sXm1lHKM5S_hD_VaUDPkXG9u_vLubw8,4479 +yt_dlp/extractor/adn.py,sha256=lMMn4pYm2h9H-NLxpTIFAXhu-iw2uX9enfWUSqcFANE,10921 +yt_dlp/extractor/adobeconnect.py,sha256=3pCzh2cXCXPyaED68_ITr2TDm-jg_u2oaYY0frtfHR0,1308 +yt_dlp/extractor/adobepass.py,sha256=G2poHminccj93P6ruorVziB9idmTOMB8hRKNNrRWeK4,49176 +yt_dlp/extractor/adobetv.py,sha256=Nyb46GUGmPS3cpYhy1btz6Z3RO9xCCwCWv3IKMFVnng,10284 +yt_dlp/extractor/adultswim.py,sha256=tQQE_OfXJtZflz7Sv1zvBmWX7l5nRLlATsxHD7TExJg,7835 +yt_dlp/extractor/aenetworks.py,sha256=vwXKMEc-oMEXIB1motHadQUhnsFGCd5LEJTiKxo0s-Q,12068 +yt_dlp/extractor/afreecatv.py,sha256=_RUl9reqhnXlkEooGeqlt8d_mP4Bg1vyN4umvaW3yVw,19334 +yt_dlp/extractor/airmozilla.py,sha256=yyMaDXt1NW9vLnUuLXpkFSXKFVZXH2JXUjNpldtUc0w,2697 +yt_dlp/extractor/aliexpress.py,sha256=93Z9klWzyJgfIJuDRcMwgNs12kxwO2QNExrIXG3nK3A,1581 +yt_dlp/extractor/aljazeera.py,sha256=6UvtPXkrp5iP-sdE13O3efB5ZlCd_G2r4hSImtv8lAM,3421 +yt_dlp/extractor/allocine.py,sha256=Lo5ktZLZ0MnfMtl_dOTPj9UmyGOVfHZix7RLPyIygo0,4962 +yt_dlp/extractor/alphaporno.py,sha256=XOHWtJKVAGHtWS1_PmTQ30ZMKoMiWJzf1lg-wujyUsU,2724 +yt_dlp/extractor/alura.py,sha256=iWoTMaTv4bDabtpK5SSq_kFIgNOggcDQ9ciwN80HK74,6698 +yt_dlp/extractor/amara.py,sha256=O_5iljzf-IRg43ZQhMd9Zu_PJtTPgcUXPu6y0x-Y5PI,3583 +yt_dlp/extractor/amazon.py,sha256=3lwEwg4ojd-1ewdrD26s11T0LhbYaMNOSbv6rnuEHi0,2064 +yt_dlp/extractor/amcnetworks.py,sha256=ABWeZj2mGIAK_MTlWCt2GJ7MF7-psOKasLDuxWAQ57U,6173 +yt_dlp/extractor/americastestkitchen.py,sha256=awp7quP2YZWKDgaVzna4ut2fiaaQ14_qpGI6YsmgFlI,6331 +yt_dlp/extractor/amp.py,sha256=jN2rdNXDOCBgAHhJdKdkAqr4azheetiLuvj5FlDW9SE,4088 +yt_dlp/extractor/animelab.py,sha256=inXaLC_7Cv62D-IXBMcgeID6YGKADxwkYU4qzzGa59g,11269 +yt_dlp/extractor/animeondemand.py,sha256=WjuhQbJay4XLmd8N-TcB04xz8Lu_s23adCaWxJxFeoM,12144 +yt_dlp/extractor/anvato.py,sha256=_Y-ibrkdWUJliBVCF1fjVKuk8ziqfdF8egLkNLfSy4M,25606 +yt_dlp/extractor/anvato_token_generator/__init__.py,sha256=6OJbKiRPVyG7Giq67L9DT1EFecgiO9SzM6EZexwDjdA,116 +yt_dlp/extractor/anvato_token_generator/__pycache__/__init__.cpython-310.pyc,, +yt_dlp/extractor/anvato_token_generator/__pycache__/common.cpython-310.pyc,, +yt_dlp/extractor/anvato_token_generator/__pycache__/nfl.cpython-310.pyc,, +yt_dlp/extractor/anvato_token_generator/common.py,sha256=59AXTU3EjK5ZUL8ZJGlBwRg-4L0qH0pqDLk0ikq2D5Y,187 +yt_dlp/extractor/anvato_token_generator/nfl.py,sha256=OuaMatpN_zOyIJbS2kgImSmRbAWqluFCHe4vz6VSSTQ,1011 +yt_dlp/extractor/aol.py,sha256=AqgHQ-7Pq562VlamGvyMHjDE0Z3_hdniWLH0u5emvQs,5498 +yt_dlp/extractor/apa.py,sha256=NJPTD2KdUC-EmwFihicB9NRmGRq6wguQcA6MOdK-J8c,3348 +yt_dlp/extractor/aparat.py,sha256=_b_RmuIXYuasWXGKJbiKT3j1fHQSARTOl-IacwfxqcM,3381 +yt_dlp/extractor/appleconnect.py,sha256=P2uoUrH1UxCvRXyYTYSi8EwbNn9HKt0gRDIZa1jahd8,1909 +yt_dlp/extractor/applepodcasts.py,sha256=D3pK_p3HfeGGN_TuYmA160Ed0pbCTHf1cIycGSls8S8,2378 +yt_dlp/extractor/appletrailers.py,sha256=GBx4KzFpFng_zX0RwEolOsFIH5YYOuXwOuP5X831sa4,10322 +yt_dlp/extractor/archiveorg.py,sha256=3APfzSSHeuP0YfeWW3XFUPv04pPZGptnIHYM5v-88_s,31636 +yt_dlp/extractor/arcpublishing.py,sha256=tnJah6cCWxojhLKlHOuRatNMw518gxMMqsSl6HI07dc,7636 +yt_dlp/extractor/ard.py,sha256=wuuuk22HiEyqv7e7rWksS9Ip2G7pBPppg7pw4XLhIww,27224 +yt_dlp/extractor/arkena.py,sha256=wugwbnYC0K4IjedDl3PTFpT4PoOV4zKf11OF8C3NVe8,7450 +yt_dlp/extractor/arnes.py,sha256=VSpTdY9g_V2gKoVJGsEzsMRMmkWmCj75lzSCJoc-Nkk,3652 +yt_dlp/extractor/arte.py,sha256=q771eXdmvytefI5Df-uVTr9yi1nxKobfK12MNFmdOMc,10652 +yt_dlp/extractor/asiancrush.py,sha256=W-S9zQ-GQyLxBy3mvwamGVkobK5BYB8kozIN2JtQAMY,7799 +yt_dlp/extractor/atresplayer.py,sha256=jVEWbhgXiP4D4H6GsbzCvM28S-Ej0-Qz5tDFfM_SdaU,4359 +yt_dlp/extractor/atttechchannel.py,sha256=t971NT2OsTae2Us57Ov_g9p3LnVGSKPZOOUrKlaI2dg,1962 +yt_dlp/extractor/atvat.py,sha256=_ItAQIdAS9-jgAAN9UImS2Vn9cdFmWsxmyKDehnIpSk,4031 +yt_dlp/extractor/audimedia.py,sha256=KPchAGXdjQkOJKMEVS-d1nLkwFzUZFws0XZmZJEC1YU,4026 +yt_dlp/extractor/audioboom.py,sha256=Jk6zmgFu9xg4eC5iy3VFPkCPv-BUsqVyn6tDEpKIPhM,2659 +yt_dlp/extractor/audiomack.py,sha256=43rqzXvl3Wa-AI3j41Ub7MNh1a5rPCXCxJGxERZSui0,6208 +yt_dlp/extractor/audius.py,sha256=gOeVEe59USdxb8CBpTLJzEiC2HAi743Eac3mbLcagrc,10748 +yt_dlp/extractor/awaan.py,sha256=2PeieW0zqbxIGbfiFg2f2ifBYZjpoV79FWCDxs3fXRQ,7193 +yt_dlp/extractor/aws.py,sha256=jcCiwmXbDdpv3c7eLN1DlOtiwnRaF1Ixnut1WQyPj6I,3092 +yt_dlp/extractor/azmedien.py,sha256=q3HoURO60xYhhedVOOwvqaa_w5bdXlqzOZMWRP4spik,2252 +yt_dlp/extractor/baidu.py,sha256=QalvwXEI_7fNuwffliOFBvL-r0xL00QzoHlNOfW693I,1966 +yt_dlp/extractor/bandaichannel.py,sha256=r8Sic893e8glav_W8tmQFH1k9ETdQVkkN6qrwCkBQkk,1300 +yt_dlp/extractor/bandcamp.py,sha256=yH8gMhzvLXpej1JhYH5A8MDRAjJjwHYOUypNDI6nvvc,15678 +yt_dlp/extractor/bannedvideo.py,sha256=YucVNPl5nNHTsEFHqegVA4t8_7X25thkdbV7zffGj_o,5380 +yt_dlp/extractor/bbc.py,sha256=cZJGWz6MKAD1xXtbLFAl-mzuHUs8y9WsxLjoqBoCn_c,70278 +yt_dlp/extractor/beatport.py,sha256=516AlMYHBKmjNvyi04viIoPpTitTbAA5ZqQS6LW9fC0,3339 +yt_dlp/extractor/beeg.py,sha256=kwkQ5FoMCW7xJWNO8-yccuyMKCAot90R0jHDFEh00B8,3531 +yt_dlp/extractor/behindkink.py,sha256=E8KyyAPheXH1Gnp2MlZAmoLu_QMEg-vo-a412Lg0zqk,1633 +yt_dlp/extractor/bellmedia.py,sha256=o3viR6gfzaTlG7-i53v8gtohSDvYMPMzL6amt5sTyiY,2957 +yt_dlp/extractor/bet.py,sha256=r2Uvpp-Bla5eaIF3477wAek9TYGB_xZOkAJ8Gt0_XTE,2822 +yt_dlp/extractor/bfi.py,sha256=Mx957MVrUQ56Usod5I5l5u5zGXs0FMGjGirErZ7AA0Y,1341 +yt_dlp/extractor/bfmtv.py,sha256=2efBwjuLe2vm0JSDaVvGwRbVksPHsUM_hCd1WiX4v_E,4216 +yt_dlp/extractor/bibeltv.py,sha256=OnbmEi42LbcG3MdtqRS9kTIdzu8oVnat1KTcz_M1VEU,1102 +yt_dlp/extractor/bigflix.py,sha256=CNqvhiUdP-QPcyp7zOu0pswnDyGP6svM0eLm0o8hhQk,2334 +yt_dlp/extractor/bild.py,sha256=DF3B86NxCeLI8682I1Y3yxN7ZBjLor-jbzfNqUjYS5Q,1363 +yt_dlp/extractor/bilibili.py,sha256=YX9DWt1g7uFXQtGRHjCc4aG1EWVG-AV1Pg-nFmI_QrI,38442 +yt_dlp/extractor/biobiochiletv.py,sha256=KUFs79galXxI8iO77-9vIMD5web4qrX2tqVynDQo7a8,3511 +yt_dlp/extractor/biqle.py,sha256=Nt2jIi-eN7wBYVCMOU7XcXVxSNIzruhhvEF0lqdmD6k,3864 +yt_dlp/extractor/bitchute.py,sha256=8_LHzTSwOD2BFmUtxREMaP9OqJ9t1jc3vm537FAqYAU,5970 +yt_dlp/extractor/bitwave.py,sha256=Qpiy5dQIpZ2nfNSkjo2OlmvP2IUL7KYSByzWlv-ITR4,1846 +yt_dlp/extractor/blackboardcollaborate.py,sha256=lNleNs9GdUGU0EovaVf_Z5EUYrU92NEeBKI5gVWq4Vw,2414 +yt_dlp/extractor/bleacherreport.py,sha256=V6NBGaFYMjTww6CIS5my3GFdBZzYlcOnW5BssiOiR7o,4370 +yt_dlp/extractor/blinkx.py,sha256=5xl14ZYYwZG5eiSIPbMsAPqet7-eZyrtuXNGLvuFSRI,3225 +yt_dlp/extractor/blogger.py,sha256=-hdr_uymefLuePd0xDo9EzWot2o9-CGCk7M2lMLZrLI,2010 +yt_dlp/extractor/bloomberg.py,sha256=yY_B_-bTqtlHD8-tqdMdw7Rl1BtkUh2ehu-wneVBGDw,3323 +yt_dlp/extractor/bokecc.py,sha256=nYzeAyqHKGJL-Z8TlVawyU5k0dOszZg6T6ibsxW1THQ,2041 +yt_dlp/extractor/bongacams.py,sha256=XrHYNWNbD6s6x0zfN9D0tjbJ7MHLCkiQr9FJWPEB7V8,1871 +yt_dlp/extractor/bostonglobe.py,sha256=_MgorvRUogcdrJw0U5ErcZiOJn2PrcBzy9rqTzgUJpI,3192 +yt_dlp/extractor/box.py,sha256=Vow9CMLgxP1Vl5sc_MDs5WA9y2QSmbLe41hKaKs1lOY,3781 +yt_dlp/extractor/bpb.py,sha256=E9hgpCuaR3ZNICRx_0Sff9RXxyK2FS1Qiwx9ScTanIk,2201 +yt_dlp/extractor/br.py,sha256=cELz9ogrSXlD863dnYC9DgD4-0WAxZIOSukLL7oHWUI,11876 +yt_dlp/extractor/bravotv.py,sha256=LOng5Y8xdPnOoplIFo14mHknNb4aJqL84h4sDQQVCt0,5099 +yt_dlp/extractor/breakcom.py,sha256=sBFWGgI3mw5TKZBdX5FNndgqpTu1sS8XtkyOWxZK8kI,2936 +yt_dlp/extractor/breitbart.py,sha256=jnXJtWqvmJNWk79zqZ0eZnyu-GK7CGUkQYvKYa5hKo4,1546 +yt_dlp/extractor/brightcove.py,sha256=t4hLFlIxjMyxXPcHbDWPI3GzIUNEh6GCyM1KgyBK8AQ,30759 +yt_dlp/extractor/businessinsider.py,sha256=svjPnxX_g7HYlejUzF07uD2WkhYG3gFCRcbpluv9H0g,1998 +yt_dlp/extractor/buzzfeed.py,sha256=6WMrGPF9nLaYlM0JG3sSr1ATKMiNifDehmof2Hk0Eg0,3655 +yt_dlp/extractor/byutv.py,sha256=xoPzvO4rQEmWUwb1B6pn9DUNHuKpI3G1tvwaqShMhYk,4483 +yt_dlp/extractor/c56.py,sha256=Ndkl-_uImTP2ADb_rMMmJ8zEP-3Yhkg88EE0pA6q-Mc,2051 +yt_dlp/extractor/cableav.py,sha256=Ghm3_DJ0FkKNVOhm4Zc2-ZSjAU7dmBXZ0llkhwM2hIo,1175 +yt_dlp/extractor/callin.py,sha256=2t-sSh5QNSeQSMeYidctioTDUulYafPfSa871_iejlY,5144 +yt_dlp/extractor/cam4.py,sha256=Uqc8YHMPLztaYqy4EhC0BDvoE2XPK6SUM6rHDxaMsMI,1208 +yt_dlp/extractor/camdemy.py,sha256=El3jwjpdxfZEf7JvGv2aqPMgNn3wAce5lGET6FKZQW8,5772 +yt_dlp/extractor/cammodels.py,sha256=_x0wWqBwXPxrB5AsdGfBkIAly98a-zAKPm0TSXRpNdU,3458 +yt_dlp/extractor/camwithher.py,sha256=AJNmGrsOq99-yUb7-LOf3_TjbxbzEFP-vE1GpYEtWRI,3229 +yt_dlp/extractor/canalalpha.py,sha256=sRcR_0ENAGQiwr-1L5NQ6u2-4bB9BUi-6LSZKfM0TZM,4362 +yt_dlp/extractor/canalc2.py,sha256=hENzjlT9ESGePiXIGvlR2YismmB2JPVBft5_DC-UrtI,2317 +yt_dlp/extractor/canalplus.py,sha256=74-TcDOUEwx0c8jBBHS9k_WfzPcCyvb74I7G4rmlapc,4457 +yt_dlp/extractor/canvas.py,sha256=mmSiNEG0MJLaCd2LL_SOUn0OutdUo_OmKR7NyD_Aijs,16114 +yt_dlp/extractor/carambatv.py,sha256=2zVlCstgnhj7Ncj81ehnZooHE0wFebs8j8Bj3gvIgL8,3526 +yt_dlp/extractor/cartoonnetwork.py,sha256=WcsKHOp0dGokEIc5911ssrU5g3TfJpVUWpGh97wcKxc,2374 +yt_dlp/extractor/cbc.py,sha256=zJsHiHX7gc0Xpz4qmNS9dA4ijIAIKAesW6IM4yT0l9g,22137 +yt_dlp/extractor/cbs.py,sha256=yHVZkOelmmsi4UQT6hLTDm_HCzE6jnyZEQxTW8pwJj8,7178 +yt_dlp/extractor/cbsinteractive.py,sha256=2fTIT_xX9cCWc-MGflpN_G9fvty53he60oW8URttND4,4030 +yt_dlp/extractor/cbslocal.py,sha256=ivJXdiQJDkLei83mrd7GrW6LwLUjGBwJsp_D98J0IFk,4117 +yt_dlp/extractor/cbsnews.py,sha256=4ihf-Y7stUspvPe2igFmtAQhoYkqrP5WiP9VjiJIlpE,7633 +yt_dlp/extractor/cbssports.py,sha256=Ls8cCYYBT3_JdJL8_AwK5JBlomRP2yN1BN9TlTzks-k,4898 +yt_dlp/extractor/ccc.py,sha256=XtmLWuyAW8kjgUPmbe2P2rEDPz1zLkWiGtb80R3wYmg,3875 +yt_dlp/extractor/ccma.py,sha256=m8muUa-9EcG57szfUULDrGda29wmQkicMfV8uvGNBB8,5468 +yt_dlp/extractor/cctv.py,sha256=_CAZTnaI0cOf-EZ_GOcFw35SclsAZAc6C9j4V1X45-4,6941 +yt_dlp/extractor/cda.py,sha256=oPDMe0Lr0Isrt1SL11hvy3Zvyd6yJO-ee31TpnARlIU,9488 +yt_dlp/extractor/ceskatelevize.py,sha256=BEur9LW1PlVB4AK72cAVZAMU-SLn63ixY9bEx8CcFYI,10565 +yt_dlp/extractor/cgtn.py,sha256=lHs-1Dv-_THTiJSLGKvJU7VO5VFY38qFz4GSW_xSJT8,2894 +yt_dlp/extractor/channel9.py,sha256=PbIXihb41x9cJhNvZ0QeqZWJauQxU-jlNmmo2qiIbI4,10260 +yt_dlp/extractor/charlierose.py,sha256=Buer_xlI05rijsYbUsge1sYFrDk5hgPRg6Z7lBs4UAk,1833 +yt_dlp/extractor/chaturbate.py,sha256=fKGZork5BLK9J7Ja6pXyxgL9SYF-6ThJlQnTXPiZGWw,3893 +yt_dlp/extractor/chilloutzone.py,sha256=-8rEBu8xTp8yh2pMFrieiipbN2TQUahgTnRd-F1dNHE,3487 +yt_dlp/extractor/chingari.py,sha256=vYAm51tB07uc05LrxcpS0AOraxxRKntg108UlzqcA5Y,8711 +yt_dlp/extractor/chirbit.py,sha256=_FBq1kLCYy8qQcQFFil86GIjo5z3X7utrQLRkR7RP7o,2946 +yt_dlp/extractor/cinchcast.py,sha256=Kxi8jDUsQxCsaiCj0sNomO-_GRJ5gKwseLd7--9_EZo,1997 +yt_dlp/extractor/cinemax.py,sha256=2OF9APr5a4APjhg4W4lSXZqQ0V_L521rUK0Pc0LMr60,935 +yt_dlp/extractor/ciscolive.py,sha256=Ug-pOw2-J0TBNwN4XJPAoRobZPJx7kRqb1wB3v4URjw,5915 +yt_dlp/extractor/ciscowebex.py,sha256=cVsTvr6n67yzGRDMGtagUeKoeDukQ4Ppy_aMbA4VZVA,3675 +yt_dlp/extractor/cjsw.py,sha256=N7EPF68-_MgsNrZhGLsaeTXKrvY8UOo1-KkE7tw6mAo,2398 +yt_dlp/extractor/cliphunter.py,sha256=XNi5PzG14pujij9ZtHoFsSCFpkMk3Cb7OeOxKGjjHQk,2533 +yt_dlp/extractor/clippit.py,sha256=ErRu1a933Hkkb_rp6HgGFfeoQkCj1tjwyhR3Lv9JawE,2551 +yt_dlp/extractor/cliprs.py,sha256=HZ9hFAtjrt0wWeZTWDWCi-nmN5eDrYoz1qe24Src-wU,1030 +yt_dlp/extractor/clipsyndicate.py,sha256=rZrmkrII23KsqvBr19G9UaHXNsxfX3aXztgU4ul_ysA,1812 +yt_dlp/extractor/closertotruth.py,sha256=yLlHaaoqensA6vgtR_1_RZf-MObccEd6MYNe1PaRNlY,3095 +yt_dlp/extractor/cloudflarestream.py,sha256=Hmng7SBbYOlwly_nyMiLzI9mtz_eCovPgx3azYuVlqY,2677 +yt_dlp/extractor/cloudy.py,sha256=8Ljv6LT5n99iO3p-IrJp3zbX8idSdtNC8mIeGTX7eOA,1898 +yt_dlp/extractor/clubic.py,sha256=GTH-nPfGmVJdtJzXRUu6t6d3OESeAe2jpAXHaFw_pJI,1976 +yt_dlp/extractor/clyp.py,sha256=fF6Ez7k3HnuMpq9lT65wftLnFAuIJwmgO4rjVo4mnh4,2387 +yt_dlp/extractor/cmt.py,sha256=XQ1xdpGTWozk4L1vt46OtfEPYUZBnziOAW5Q9b5uD8U,2222 +yt_dlp/extractor/cnbc.py,sha256=suGVPDrX1m1OKH1E36y0tbk-Rj8ylj1ROhOqgEatJp4,2279 +yt_dlp/extractor/cnn.py,sha256=hVe0i8lebEDXr5JGy-y3Qr8TYXG-g33L9_fBotwEOGw,6510 +yt_dlp/extractor/comedycentral.py,sha256=4rsgQPqqsmzwVlgncSZUtI8i0TKB-nmsaBPvNoZPgi4,2185 +yt_dlp/extractor/common.py,sha256=gHtdDi969Iv8Rx5MUGga9hZx-MzOmYIfxiVd1EFQ6n8,181415 +yt_dlp/extractor/commonmistakes.py,sha256=0pJgLH-NBoaMm7BEksVtKdPPMWYFHAwWG9QTCVkjamY,1518 +yt_dlp/extractor/commonprotocols.py,sha256=J9TXF3iXDQC5VSyTYH12qOKg_IdH6cYOX0WrHwZxpus,2006 +yt_dlp/extractor/condenast.py,sha256=vQCrPt3zM4sJOHGLny2KOGx00wRoG4WZhIBDeKsb--E,9733 +yt_dlp/extractor/contv.py,sha256=EC7EcYD3Clx8eHppDAeIYeYgVLll7raop5ttlx0XU0w,4196 +yt_dlp/extractor/corus.py,sha256=q-NAUO-6i4-GTUY6QSH7FEMc2G-OhwpPe5jP-G71AGM,6313 +yt_dlp/extractor/coub.py,sha256=Q82naXzGCSA3HlNXaU582SJVKWmv_FG4q54ljKyaoWQ,4653 +yt_dlp/extractor/cozytv.py,sha256=8-xQ_hMxDOesbMiPo7OIyCmt1TAqdaaglKvOJ8adR1c,1429 +yt_dlp/extractor/cracked.py,sha256=kS6U6ncv6yXg8zBSp8ECStpBAoeevONsk0Y3oLwH8vA,3138 +yt_dlp/extractor/crackle.py,sha256=-SYCk988IgYdahGsrGNnPiV_w67Xbl9GHDjGnsHdNxk,9781 +yt_dlp/extractor/crooksandliars.py,sha256=_YnZw9fC_xyq-hlnfRhzkU_UtDNNdUfUsxH7N4v1jPQ,2061 +yt_dlp/extractor/crowdbunker.py,sha256=wtJbskmG9aywlk9F9WtWHGx0yIy88v-DjEi2GU8nEz8,4255 +yt_dlp/extractor/crunchyroll.py,sha256=jiK-Vo-d_jOU_1I1tIe2el8jh1nJpu6IWV_LT3kMPXE,37360 +yt_dlp/extractor/cspan.py,sha256=s1fbh9v0Hi6uQ1ZOTZL6wfmmbQ6ZNuKrZZ1-SyAEawU,10276 +yt_dlp/extractor/ctsnews.py,sha256=hEl04crS3UHNxtKXsXOf2ElmHGTxL5ROieCigurNX3U,3648 +yt_dlp/extractor/ctv.py,sha256=QNsHs218dge0ze3d6SjOXhb7UkxSwuTxxA14ngQzlvs,1772 +yt_dlp/extractor/ctvnews.py,sha256=vK6YNGXldADzsOLOgwyrN8IpKMEyRTG09gfr2b-61UU,2611 +yt_dlp/extractor/cultureunplugged.py,sha256=IPZSbFtFLCbk4IVEz3gLgOxXDRt9KnuB6N15PJKCp1A,2494 +yt_dlp/extractor/curiositystream.py,sha256=XgRzvtwaQTHzHR-SLHHv7ek6t8E5h_tneWnEVmGKE68,8123 +yt_dlp/extractor/cwtv.py,sha256=yLeeNyB-ZhS6EVj3YbknK9eOXnkBb3Y6m-9weR30NQk,3840 +yt_dlp/extractor/daftsex.py,sha256=poadr53956UwE6-YeTWIRziHibzyP9f7L7sWyx7lRFo,2890 +yt_dlp/extractor/dailymail.py,sha256=kJPEFOuCZRjA2HFfIRDUDxDW5K8Bq15b_kQoiaRgrjg,3138 +yt_dlp/extractor/dailymotion.py,sha256=0BxTR55OIH4dPCZmn54235SmP5ARkTsvbHsdpMCIaBA,14833 +yt_dlp/extractor/damtomo.py,sha256=WZ2IhR71HhTFT1ietZ4jXNjIjFJMOBmfwnNygrdx81A,5626 +yt_dlp/extractor/daum.py,sha256=iyzonPXnHof1dddzk8eCzT-B3XsYq26UdwCTRFCvIv0,9405 +yt_dlp/extractor/dbtv.py,sha256=OPmyy7sjkreOZ-mcwmY5mjeUXh2XuTI7hEOeB6RiCSs,1951 +yt_dlp/extractor/dctp.py,sha256=7mVjUn-B4rnU4Y3kEqVQnwaiHxQXh5-wpq538Ic1kkc,3588 +yt_dlp/extractor/deezer.py,sha256=8X3nZhPQYUo_nrOFc1YQy8yTmJ9FhdBCHlHaVl8SkTY,5275 +yt_dlp/extractor/defense.py,sha256=iO_Gu4-dmSyVOd4FBbkhQKnGY3NEbVuUPHXlfgArOtw,1242 +yt_dlp/extractor/democracynow.py,sha256=5GxV8LMOmztZR377OFMu28wsQPoEiOVk-28TowHhMYw,3101 +yt_dlp/extractor/dfb.py,sha256=T8CB5PSWv1X3OnrCOrppMsuWKM8JQdDW2PkI6oEVbak,2241 +yt_dlp/extractor/dhm.py,sha256=OCPENDENLTQ65k_gLbwJZWGyO8ZvCsfLmV_047Ed1OE,2091 +yt_dlp/extractor/digg.py,sha256=DCVLBoex0eg7m-RPB65VDs_kcsFIyr6yTcTXCmUZLlc,1882 +yt_dlp/extractor/digitalconcerthall.py,sha256=XhVgyzDRr0mkHmHYRe0OZoC-ccydymIoo-JwofnOn3k,5813 +yt_dlp/extractor/digiteka.py,sha256=aDHCPBZUV85nGLE7D6zGamKfzoXfqrUaTohUawkjmaQ,3505 +yt_dlp/extractor/discovery.py,sha256=p6kEneQlyfn2_o5L6USfQF7m7HrLOO3NgHjAd93ZFUI,4898 +yt_dlp/extractor/discoverygo.py,sha256=jOXobYWnTxbbfL4kYKzgzFNZMfzTGkEocYu01mODADw,6079 +yt_dlp/extractor/discoveryvr.py,sha256=EokWC3iyvXpdrlomLvuCaw4mA2p8240qn_sFETg50mE,2129 +yt_dlp/extractor/disney.py,sha256=iEZgRS3vvUmIdeHZDMu8PCsJWBy7eme4DHst3GkB4_Y,6863 +yt_dlp/extractor/dispeak.py,sha256=Uuojr2F-xx3TCwgq9goN-6-8pwQ22zmowaS13q3I0U4,5029 +yt_dlp/extractor/dlive.py,sha256=yJHwy_gsw9EoQIHagSto_J_V-TEfAVNmW1QeRDlfZPI,3089 +yt_dlp/extractor/doodstream.py,sha256=Jlx2vpRl3pAx4ICuFBS2vNNU5aPWtvtaA7xr7C1ubWo,2872 +yt_dlp/extractor/dotsub.py,sha256=SmxJbUFtXxMPtg4UDbO0Am36jCDmMgMJMdA1FdmDoCs,3079 +yt_dlp/extractor/douyutv.py,sha256=Iu2FyWuprjMa3lM_glNKy541il_RYcCIwZp2rH1NO9w,6860 +yt_dlp/extractor/dplay.py,sha256=d6VWEnS6syNpJBxG_w3dUYyKHvwtNVBuj0mf3PKyFS0,36170 +yt_dlp/extractor/drbonanza.py,sha256=-iiUKXzb9QeKVGVKmMv1PyO7S7KTgvIaWvm6FHLkhws,1967 +yt_dlp/extractor/dreisat.py,sha256=WRny3tNvF9Osic4YE7xnyI27CkfWXEzisGG1kFgNuRA,1614 +yt_dlp/extractor/drooble.py,sha256=888Nf_klm3eEN_c34qjd5lEVy6RHtmFoE5De6GwrVj4,4244 +yt_dlp/extractor/dropbox.py,sha256=EG-IXscp5zD8fhJ7gJvbHtjZ9eZGUQohSSTDxHHWYMw,3395 +yt_dlp/extractor/dropout.py,sha256=uKBuZXjKpdKGZ7-jsqVb9hQk-bstzOL-KjIPQ85gYos,9350 +yt_dlp/extractor/drtuber.py,sha256=nAQAHrHt5s8uCLAAQKqNyWqSPDNTHeaGbF7lbzTeizA,3964 +yt_dlp/extractor/drtv.py,sha256=KkLEWVrYa3bMtFaS5iCuOTo_WI6Jt9L8emXo5cmSCf4,13760 +yt_dlp/extractor/dtube.py,sha256=u8gkaHDKREI8FHBc0jt4RcF7238yb5TSk86d0bLd4fQ,2784 +yt_dlp/extractor/duboku.py,sha256=4ksF5XN9aUukkCz7TCqQrlGGt80Z_35nTl6dHo1eTsA,8313 +yt_dlp/extractor/dumpert.py,sha256=IopTcem6vpcQjMxOjRf1uQDNfv8w343iLlxuCBS-Law,2701 +yt_dlp/extractor/dvtv.py,sha256=lMjGanNp4IU-J1Ar0tosqsmElavbKPmnUZy7CO7Y_bs,7528 +yt_dlp/extractor/dw.py,sha256=d5it-6h_cQeqx-tL5jJsiJhVOoDNkzS49gu_ZY7cEus,4099 +yt_dlp/extractor/eagleplatform.py,sha256=stJbHBNW10LWDdEQOla7cw_ZQXNPk5vAq83OnMVae8w,7732 +yt_dlp/extractor/ebaumsworld.py,sha256=TMFEr805YK-ZrmbludqtKSN8CA8jJuUlVDwzTkIZvdk,1086 +yt_dlp/extractor/echomsk.py,sha256=YVN2auiHqboHxHJ8MHTISOOIacCWKBNkri2uyD7uJwE,1317 +yt_dlp/extractor/egghead.py,sha256=LIJ3AO9-6sfvzw_6UYl5PK72IOfIf3ETA6S2EmJ-QE8,5029 +yt_dlp/extractor/ehow.py,sha256=uHLC6daoEOgd6TiSWH0VYUX0cbIK6OJlE14ugTgif38,1518 +yt_dlp/extractor/eighttracks.py,sha256=vD4pnSrdvoBwJhmDHvrmn162sm6IigWgJVRHxkQMOBs,5868 +yt_dlp/extractor/einthusan.py,sha256=wc6KA1UAWq353Vi77rWbLGoYLqSeDaFFq-1kfDnn2ts,3706 +yt_dlp/extractor/eitb.py,sha256=tEm6mdL_Hesx_hviIsKwtfYZcnuaS52evCVknqIBTks,3278 +yt_dlp/extractor/ellentube.py,sha256=OHb5EkTmNMyALSvyKUCTd0tNKZllpwvr8FMQZRuQR0M,4909 +yt_dlp/extractor/elonet.py,sha256=1cKv0sCHipUkmALxFbQDaPTtiSgmPT6uIV6hLa64Gz4,3593 +yt_dlp/extractor/elpais.py,sha256=k2-CBrBHwdhvrZUvjwzgzG9TeUbjHs6M6lkzQHNwfRk,4364 +yt_dlp/extractor/embedly.py,sha256=S1FtaC5S-9eof5RzC6yXX9c8ki0rJNp7LwKhMt4PxDE,842 +yt_dlp/extractor/engadget.py,sha256=F5dk1P04pNJ1AuxwzeNd1N_Pm0gN36n4nwpNiZOWMZU,807 +yt_dlp/extractor/epicon.py,sha256=Y6e_LxmImaF5K5gXyiKT1p2qtAUMkM4AY6ld6BX0iDo,4288 +yt_dlp/extractor/eporner.py,sha256=OGAxQhG5d58Mmo2UStdouCaG5RQPH1sEJ6mRSJ9PvNM,4656 +yt_dlp/extractor/eroprofile.py,sha256=X2TgARGJA5qSdIKJKKdqBd9xrVWELT6N3WK84M95nq4,4638 +yt_dlp/extractor/ertgr.py,sha256=7uLcfIT2OaKhPeb96VT7FEPBNULinAhVp4iJX3zqL6s,12953 +yt_dlp/extractor/escapist.py,sha256=KbYLPwhi2hyfAHP2rdGtgLkGIVMN_8Y73LRvNw1WhCE,3622 +yt_dlp/extractor/espn.py,sha256=IluEs-f65-BCczjRNZveLfUmG6q7A5ix0KRkQuQRGxc,10566 +yt_dlp/extractor/esri.py,sha256=FoLv_t1njCscGYPo9EEen4R8ApAg7omB9XWexhEQszY,2628 +yt_dlp/extractor/europa.py,sha256=El0sNcmXLyw7u32DxTWsrJ3j5J3pkfYXTnAFpUO79GM,3344 +yt_dlp/extractor/europeantour.py,sha256=AtWpCT-zccaDeye1184vIBaTzQlB7FbLfG7aBQ268KI,1322 +yt_dlp/extractor/euscreen.py,sha256=2N_pB2Y0_tWqV7iimxGb6n4BbZCIHehkEK-8oslwELI,3418 +yt_dlp/extractor/everyonesmixtape.py,sha256=QOS-CXEdP2h-hAL2AhLAoLG6vhxaBF9c7NNxK-aXI-U,2805 +yt_dlp/extractor/expotv.py,sha256=NoW5aHnKfHSqK6_--N-qy2kP6SWAR8Wo8_PsyIg3dCI,2913 +yt_dlp/extractor/expressen.py,sha256=lHFk_GFkvxHb9Em8bnjL7dak9PSJx4sXCJbw9JbkN88,3865 +yt_dlp/extractor/extractors.py,sha256=zaEFY0b4MovGgf4YC7YddkvDYI3leXBUlWWPDO2maFQ,44074 +yt_dlp/extractor/extremetube.py,sha256=FVQEdBkP1baLIo26IU63-JxBlQUzwkbhfq5jkVc0ANg,1747 +yt_dlp/extractor/eyedotv.py,sha256=oY7jGaOMxDXDgw1mdEWLG3WLpE5uXg9XWb9fy47RVQk,2687 +yt_dlp/extractor/facebook.py,sha256=jwQOBtbpGxIl01K4uO1Pw5MBaakT3KwVctb9nWlWU3k,35034 +yt_dlp/extractor/fancode.py,sha256=gNDEjnciIfFub356XTNqoY96aNh8aq9r68X94s-1H3U,7386 +yt_dlp/extractor/faz.py,sha256=dkqraWVCiQx5rHPqWFuIXKoFveT3kV3USubmZuXnDNU,3586 +yt_dlp/extractor/fc2.py,sha256=y71Fdh0t7J4T_v-6ChvjzBmMAO4gPgyTjMQ1SCx_fU4,5236 +yt_dlp/extractor/fczenit.py,sha256=ZjI3Yw6Jh4yJognle8JUo1C_1c5vdRfYvxgmPama7WI,1760 +yt_dlp/extractor/filmmodu.py,sha256=xaUR15JHDHMVZcXi9wU0-FFxHaD2cREdnaEoRreUwQk,2648 +yt_dlp/extractor/filmon.py,sha256=9nc8pqGuMFmCsv6usH7Pp8lhWsZNieq6kAi3IWBfg2U,6009 +yt_dlp/extractor/filmweb.py,sha256=pUR8oZR8U1LZoqQsS6PBgeXsjoeUUWCLK6mb0UkVZC4,1465 +yt_dlp/extractor/firsttv.py,sha256=r55yC7-yluanh2gM4BUJAGcJ1-ZCgv4g2GREPnojTAY,6458 +yt_dlp/extractor/fivemin.py,sha256=n57QO56Mjz7v7Q1qKM0vFKjWGuyQpDBNiBNe4Gl2ais,1917 +yt_dlp/extractor/fivetv.py,sha256=fGbJyJsCj84iEKO9OiSbkJHGtoOpoAt6X8ohffOCVok,3195 +yt_dlp/extractor/flickr.py,sha256=exigKGgPBYAkn4ces3AuFIOA627SzKBT4gAcZLHoekc,4785 +yt_dlp/extractor/folketinget.py,sha256=QZg_bsepZk3n7Gg8nS-SKT6rH_XFVGRgkirstL9prA8,2643 +yt_dlp/extractor/footyroom.py,sha256=gSxBraqnRnIYgqxtHgJwcrd0Q8SJr8shT5QUKK3wzbU,1875 +yt_dlp/extractor/formula1.py,sha256=Sp5pciuyEkrxlN6Il_ua-JIhqZCX9eQR09XBNMkEqus,995 +yt_dlp/extractor/fourtube.py,sha256=hUzLjHH5BPwTfEWpzlyqSayGCShrpMK75mEYwehidsU,11578 +yt_dlp/extractor/fox.py,sha256=OBLpxHmMUydDE2REf3wG2qaVAkmR5FypJXbBX91AdoE,6532 +yt_dlp/extractor/fox9.py,sha256=d_mSVr6qWNTnE3ehwCrZJh5ZoHq0bEw9wp0AP8xYMFw,1466 +yt_dlp/extractor/foxgay.py,sha256=gco9bK1t81oW88PZHnWD7_iyPwmX6WSWRnTUVM3tqJU,2203 +yt_dlp/extractor/foxnews.py,sha256=R7_Su6xibTaSDzGN_9NzZdNAUBjFxc3Z5yNMNQt6VhA,5152 +yt_dlp/extractor/foxsports.py,sha256=_zdEug3E6BN1LxswtXEQm6-dVBa_h0uXhTDjzv45Kek,1097 +yt_dlp/extractor/franceculture.py,sha256=CB6W_Rk81dm8Q4ptzj6hk7GZiE7NvYrWmEt1_SqD4s4,2866 +yt_dlp/extractor/franceinter.py,sha256=R30dXcU_c4Oj2HE9UXbqM3LbHBa-AuUu-2MVkLIqpSw,2215 +yt_dlp/extractor/francetv.py,sha256=eC8BVhktsrC84Vcm5wsmEE7-tA-ygO-87OoJ39Iofk4,15109 +yt_dlp/extractor/freesound.py,sha256=GYHfLkU6MGRn0hZooNHrnfum9DbEcVptXZOXkRu7iIs,2496 +yt_dlp/extractor/freespeech.py,sha256=HdHjYKQ-KRPnKj9sDpH_t_Buhbf8Go7_OzhdCm4Uvy4,1057 +yt_dlp/extractor/freshlive.py,sha256=FGf8y9XNjcKvgz7U4E96OGylXSzEnPwkMgLNFX7KtW0,2641 +yt_dlp/extractor/frontendmasters.py,sha256=KDYXKLMYHKMZNnrnxG1dmP0MxFKJ6Zcz_u15hoQHYQY,8803 +yt_dlp/extractor/fujitv.py,sha256=1lH2Q8xQSfzP-nJO1JdKtT_mFEwkL2Jzy5Qn-vsZwP0,2304 +yt_dlp/extractor/funimation.py,sha256=d_uk54RyyEmVKA8XAtNNYSphIySnAXNWg5tz87tNE4g,14930 +yt_dlp/extractor/funk.py,sha256=gRzJKdqvOpSCTtxc2imuKeZL9UXxgPk5D-7FAlzpI7E,1705 +yt_dlp/extractor/fusion.py,sha256=oc6pxu9uBtbqLA_xbrTpI4KmsPHbQ8EKxgL3qaiV4Vk,3168 +yt_dlp/extractor/fxnetworks.py,sha256=bvpDTYAUs3ALaJqH7qKkaSMyeMot6EA9-jsrOYQBQvg,2944 +yt_dlp/extractor/gab.py,sha256=da-GXerxrv0LDj8FmHGzd3v1LmSxiQN3Rt0R3yYiJ1A,5842 +yt_dlp/extractor/gaia.py,sha256=gawdj_uegHhkFSuQR2wCYZITu9PaCRZ8ga8Axf_09FM,4685 +yt_dlp/extractor/gameinformer.py,sha256=9w9gH_413XjBJFSjUPxnkDpVzmwoHvx7Dblm5WPfoy8,2122 +yt_dlp/extractor/gamejolt.py,sha256=CSabA3Y18OXBr77XMzuzkZPm613sv5NDe8M6ZyX6Xr4,25208 +yt_dlp/extractor/gamespot.py,sha256=Oe-ZnBquzKSJcRu7pVGexz04lYjepezOe-gMhVnpaDc,3156 +yt_dlp/extractor/gamestar.py,sha256=mIZM-yAXWLBH6L1wZNKy3V4hpfCv2csiqJhvkyBXu60,2610 +yt_dlp/extractor/gaskrank.py,sha256=RZFbynKkrkgsFq_avR3kcuzJZHR3xzsMUlCq7G2zwGw,3843 +yt_dlp/extractor/gazeta.py,sha256=wOb5_Kxa4GSPGGeGaRxnWVUE-fmpITIdnBsXmSP-NbA,1941 +yt_dlp/extractor/gdcvault.py,sha256=ZD-9G9wTpS35ke_IQ8XAO9vhaz-GVZYO_vPt1OA2KUw,8592 +yt_dlp/extractor/gedidigital.py,sha256=0KtW4HDlTvt0A1-JxmcdMN0mC2VUNL-r6r4pSooQlVQ,8818 +yt_dlp/extractor/generic.py,sha256=ZKnFtyeJ33DlAlZYh1lbLI5hwG8LSjnec12Sa66o4Wg,169480 +yt_dlp/extractor/gettr.py,sha256=Kff8R6Q9kOMYQtD-uaV7HVMOlKJgZ_BiX9_l5tqAvqs,3730 +yt_dlp/extractor/gfycat.py,sha256=jEMYXCE10v3g5xYCDYzmv3IrU-RbOYkwXOtsOi4Busg,5384 +yt_dlp/extractor/giantbomb.py,sha256=EpNBL9IyyO19PO9dci3sPNKIqUHes9isrAoJYbZHe3w,3042 +yt_dlp/extractor/giga.py,sha256=tNMPRYimHGve_dQHQtwaOocf7kZ5A7Nc0lr6QyNfovY,3820 +yt_dlp/extractor/gigya.py,sha256=mOOEvZYGSkjopGbL13ScQmE2lEHGKrk9Ktz-KUgfrqk,677 +yt_dlp/extractor/glide.py,sha256=VzUsfDQA1nkKrfEsxQzC6MzfZkJwYnvJlw2moXsSPts,1543 +yt_dlp/extractor/globo.py,sha256=ZXb6voZYmPhcXNilvGfEWzyA2CQ1cWDxbdmEbSAwNtA,9278 +yt_dlp/extractor/glomex.py,sha256=8oAOzFz01wH_6RyVFp9ZNG-EaX63W9zScde6k34GZ28,8614 +yt_dlp/extractor/go.py,sha256=oRb572mKuEJKUiL8QiWy5S6e3x7VuURGHx3YViSqVak,13108 +yt_dlp/extractor/godtube.py,sha256=6tgsH4iFTIRfNsPct8NXXMNTISEm4u9m_WC0ys9fNqM,1772 +yt_dlp/extractor/gofile.py,sha256=3hWdAmODW8VtBgWoAtLVuXk9Y5gCMRG8CZM1zoyDFCg,2839 +yt_dlp/extractor/golem.py,sha256=ajnZwWxCgGRBxUZyiS8nah-o_2KqYxXBMMI16TAm8N0,2209 +yt_dlp/extractor/googledrive.py,sha256=nTjxmpqkW6qcjmbfFLADW1_CBvU67GZAvIZtZ8phKyM,10979 +yt_dlp/extractor/googlepodcasts.py,sha256=ksvt_1ve6SA2HyY5JeMpk6oZ1TCf8B7IfQ2mHX5q9So,3398 +yt_dlp/extractor/googlesearch.py,sha256=aeYg538eapZ-ed9HgJmUaXvRUCxf80zordCj_rmyl7o,1204 +yt_dlp/extractor/gopro.py,sha256=_OayzL1s2DjwyPYGhlOqdcuQCMw4TgdWQf3TxJy77gE,3880 +yt_dlp/extractor/goshgay.py,sha256=Aej31Z8pE_rFyf-JX7QzLpYCNqJSrvezXIRRyLuo3Rg,1542 +yt_dlp/extractor/gotostage.py,sha256=LRbW80gOBoMCLhVobgv2qanyGBJG6H7p0usMmcdXg7w,2784 +yt_dlp/extractor/gputechconf.py,sha256=PfPExGJv4Pi0drGWPGaVhkkb50y1Ll7U3reSPcfCesw,1201 +yt_dlp/extractor/gronkh.py,sha256=kwkGf-Eos7ItkybFmGUqyaCxhfpa9ySrabshSKmUxu8,1714 +yt_dlp/extractor/groupon.py,sha256=9q8Zd2ehZFBJoblwrb6_v1MQTGn1oyAaqOYrRpmSRi8,2599 +yt_dlp/extractor/hbo.py,sha256=QNnEcIRxyxqGXxCukpmN30bldM2HeLDbT2PEviLutm0,6128 +yt_dlp/extractor/hearthisat.py,sha256=o6-WaXwh-doPa5UWm-8OZwt6PQ78gYBkJ6pi-bkATos,3583 +yt_dlp/extractor/heise.py,sha256=01khFSoq7XoH8mlH-n37AgCZF2dPXPjoKJ9VW74pWxE,6543 +yt_dlp/extractor/hellporno.py,sha256=U8dJZSQkjMlIao2UOgprx2DEzwqpAjEhpwHrGPmxhh0,2716 +yt_dlp/extractor/helsinki.py,sha256=WdB7tDiMeAjDdzOcKQMW5G418xe5Zw9Fs38dq2OO7xM,1352 +yt_dlp/extractor/hentaistigma.py,sha256=MCyuul0BoOqAfK3wg552DNr8v6YK6ZuUxsu7BCIk9kw,1201 +yt_dlp/extractor/hgtv.py,sha256=uLr8rf8JoEDHlaXeiwDpN0OOt_l8IT55a4K80hnabBw,1435 +yt_dlp/extractor/hidive.py,sha256=Fx0ELXdXZv3vatEm2CdlvoFT4ZWA7pS8GhTlkC0EhMk,4977 +yt_dlp/extractor/historicfilms.py,sha256=_lzlox0fH4N0NKzM8c_0nsAzPIAFIqESVgtztD9YmRo,1581 +yt_dlp/extractor/hitbox.py,sha256=q0e3JlHwyaxAmDS1hIsWccpGK7BmhcgfaA0FHLpvQj0,7396 +yt_dlp/extractor/hitrecord.py,sha256=2hGrdycMsxhLmX2W6Fn8EY4iVItR7Ur13TWy5dlbJYk,2269 +yt_dlp/extractor/hketv.py,sha256=bLhJTGgN1FYbF1T35Ts-Md6mkfTYmmXKlJgMBsZJobM,6965 +yt_dlp/extractor/hornbunny.py,sha256=qE5PYvUG5FZ7gpJpTARpYjXifF2GzExxgGURcO-sOpU,1527 +yt_dlp/extractor/hotnewhiphop.py,sha256=xgjkDOUAWvc3vWDLR8uItt-XTtLmJA6sNvUX2sFmMQE,2283 +yt_dlp/extractor/hotstar.py,sha256=205OslpunsJYr-78Au3-Lq4AClfNHzWWyL70MFHzc20,13617 +yt_dlp/extractor/howcast.py,sha256=k59o2rOOFIlj85U8iyUmOsjAPmJwqhAZLw5xOsv7Xgc,1370 +yt_dlp/extractor/howstuffworks.py,sha256=10-Nq-zfO-QxEd27Uw83-RFLxrNMCLkPiumVOP4STIs,3465 +yt_dlp/extractor/hrfensehen.py,sha256=U8oCLrb8iXfCmiqHNuq_mrPfp-qzrZuIusi1YZz5BYQ,3860 +yt_dlp/extractor/hrti.py,sha256=k_yfJ7RGJ9CSyi50JFJId2scCwp7W-7DMfTS0a4gofI,7184 +yt_dlp/extractor/hse.py,sha256=47r2fKcJkvGdM4MNbeXksXfDDep06DlC2hEub6rU9o4,3652 +yt_dlp/extractor/huajiao.py,sha256=Xps-r8pzcb2Fg2irAKxGkAuzTJdjsuWZELQRaJBHXSQ,1850 +yt_dlp/extractor/huffpost.py,sha256=PqVoBaTcADZyYMR79j4jEAr4W_bG1LmPAq08oPrLpBQ,3389 +yt_dlp/extractor/hungama.py,sha256=aMwgh5rPb8auDjngk47hWSKooiYPVO9uK74JhbO2HPk,4929 +yt_dlp/extractor/hypem.py,sha256=DY1lqmsK_OTzB_z4A3bOVunAHolNa8fYwOGMMlwcRCI,1551 +yt_dlp/extractor/ichinanalive.py,sha256=VbqymUiZNFVRiTV9GCzqgbOXIcWH17VvsSXp1fcYunY,5918 +yt_dlp/extractor/ign.py,sha256=aFcyu1nE3x3uDsKox9iqeDBTkV-S-e9ilMYQnYN_Zj8,9495 +yt_dlp/extractor/iheart.py,sha256=ksa17Xp5ZRP9ww9quiI0LL7GB3gbzI9zYciakhsSLgE,3465 +yt_dlp/extractor/imdb.py,sha256=qMfT7DSEnTgff_h-0CET-o7yxU61TeApx7TOpaq69I0,5238 +yt_dlp/extractor/imggaming.py,sha256=upQGXZldWzkjFjORdRZp4iuMXen7csj0gO2tpLU5ZfI,4850 +yt_dlp/extractor/imgur.py,sha256=eR0mp_PLMOwb5IS3jVxjGpmkqWRbgInFjIoUms91Zj0,5104 +yt_dlp/extractor/ina.py,sha256=rUnzflYt3-gf27R7fLe1eSw1GbGjuhpRnw5HeZvst5o,2948 +yt_dlp/extractor/inc.py,sha256=b77TZL9BRdd7ImJpdet5U1eSRN4cUS3ARW9fcbJZAgM,2299 +yt_dlp/extractor/indavideo.py,sha256=VCY-zasXS5F2scDUljtGXOBAfTY6N7Em5_j3O7-VSS8,4415 +yt_dlp/extractor/infoq.py,sha256=NeKQEBMT0CtQZGD05QPyX2KBZZ89L7PBMx478OkoyVc,5072 +yt_dlp/extractor/instagram.py,sha256=p-dgI_Z3Fc9ddhXssnDvPe1VCMPTUgkjEshJ9Rl85tA,27113 +yt_dlp/extractor/internazionale.py,sha256=B2iFbcleU47Yzo70f3s91utPk875X4GlBxWndf1OQbk,3196 +yt_dlp/extractor/internetvideoarchive.py,sha256=C5jk9K8elv1dn6K3YIQEBXAq3Te0RuSOmy49XdiOgWA,2338 +yt_dlp/extractor/iprima.py,sha256=uz1I8IxSiQBzSldyrUt7FH64K0Xt4tXwAXd7ww9M-vc,9813 +yt_dlp/extractor/iqiyi.py,sha256=q0VGflnnY72pAXMchTU8PoOhgJIln-dXPAcBs39YxRM,31144 +yt_dlp/extractor/ir90tv.py,sha256=hCDQzUeIFbYVym-eWukWK4ja3kZ8xpVoHN7CCYgx3Nw,1739 +yt_dlp/extractor/itv.py,sha256=MSBiApuF3lF8y-ZniHpCTNZkMyTz_NpKAohQdwMylkc,10775 +yt_dlp/extractor/ivi.py,sha256=CDTcmb1why6w92iMDfFWY0bBFfAMZUVb8szXagzXbTA,10413 +yt_dlp/extractor/ivideon.py,sha256=2U_nmDUYF0eOcak8Tg6EdQjgP77-H5r7xXdolqBqhjs,3222 +yt_dlp/extractor/iwara.py,sha256=UG3ArHIbVi4EnX0IfkzcUaedZzJI2KZnJCMo88K4KKc,4413 +yt_dlp/extractor/izlesene.py,sha256=ZD-Dow8KiuZi0hvfNyV9zpPJuvIhDsRtKi7pw0vh9XY,4152 +yt_dlp/extractor/jamendo.py,sha256=AjdKFcawiSSthP9pOGkdlv9paiInhNv3A01di1H6PXs,6898 +yt_dlp/extractor/jeuxvideo.py,sha256=qdz-2drLFFG9Si_-m7TyG4UB_tShWkpqA2kG1hCnxO4,2027 +yt_dlp/extractor/joj.py,sha256=RMqasY5OfCO1BhdNqK-uOEwrhz-QUhPjcAkAv9JdFr8,3660 +yt_dlp/extractor/jove.py,sha256=JaEwYaTrmsxIGfeYmZftuo7SOxx3bYk6FU6anp3vIjs,3061 +yt_dlp/extractor/jwplatform.py,sha256=CA8W1Fo7Njzje01o2jWv_1l0hCaoViW_ohio-Y6B3MI,1963 +yt_dlp/extractor/kakao.py,sha256=eGPsOuxbsTICAy4bZpwEOTzQgHRcBjShEZTU37NFNmo,5972 +yt_dlp/extractor/kaltura.py,sha256=GPv5aJUJpcHjf_iwgjd8WIzBLIQTpCApBToCm587UMc,15470 +yt_dlp/extractor/kanalplay.py,sha256=9QeLQlRIDRVSuSIlkeRc1CLm1ZwWtWjo6hE2g0GvTT8,3269 +yt_dlp/extractor/kankan.py,sha256=kTAwO-AUZteUShParwREw0ByP_mes9owDpaJzGbw1Tg,1738 +yt_dlp/extractor/karaoketv.py,sha256=JTGbgX75XcNXzAl2KYh5MM1IIeOMnz82k-nGi2FsKjM,2340 +yt_dlp/extractor/karrierevideos.py,sha256=44nvkoLp-OnVLh-Hlc2Gozz-3mAdhwup5vl2JoNdF0Q,3379 +yt_dlp/extractor/keezmovies.py,sha256=f5LbWSZ0-UQTkYeCO57KW2N3KvpUXu5KUTFWbZuMVIA,4733 +yt_dlp/extractor/kelbyone.py,sha256=bvbremUPMVg1WfHOMM9-ub5keBb2BGs7S1uF-pPYytk,3564 +yt_dlp/extractor/ketnet.py,sha256=4EXoKWdRY8ld6WvEdS7RIN3Owl-3AupJ2CLqljp-1TM,2526 +yt_dlp/extractor/khanacademy.py,sha256=2o7sQ_Zdp1ufmmBd7NSXz_mpiGfwznApbSZUQgxGd4Q,3919 +yt_dlp/extractor/kickstarter.py,sha256=Fbys-clnWrnxJSWymRcnQmBPvS369M5FM1Pq74hJrzU,2708 +yt_dlp/extractor/kinja.py,sha256=jXnaSoO_9UaE6yWmZj4Q9jL6OHtKZgNU6VN6ly_2k_4,8564 +yt_dlp/extractor/kinopoisk.py,sha256=kyY5ILHD9UIix3VpmWJHt1tEQSgG-BdGEtPTif1a3Nw,2161 +yt_dlp/extractor/konserthusetplay.py,sha256=YkSRuwUivhoQ4nuOPklGLXMeEaPshSulcZsPxCkVChA,4505 +yt_dlp/extractor/koo.py,sha256=hf8r9vNJc_seVFBNjFq8ae9znaf6BCH4Am94pi8hewk,4778 +yt_dlp/extractor/krasview.py,sha256=7Ei6_0MOYdMj1ZfVbfUpIID6Jrp3R6maJEm7fDuc4V8,1957 +yt_dlp/extractor/ku6.py,sha256=yqBQKvz5vS1BWh07t03mVpJB0ydgX20jCcSo1b9YLZo,992 +yt_dlp/extractor/kusi.py,sha256=SW2a8uSFG8r9epL-brCeGV2h4XUvUJG65G7Thl_fjZM,3104 +yt_dlp/extractor/kuwo.py,sha256=vWdecIAP3duharRzMhQnM6JmgYGXg7dOyWoLiCLfGn0,12533 +yt_dlp/extractor/la7.py,sha256=agf8JtWu5F6f3xeGtmG59skTbtWVtzgVg9IS_Rfkdv0,8705 +yt_dlp/extractor/laola1tv.py,sha256=rnRkkdmkC83xRPxT0vFM_cSa686S9jpwugt1veZoWZA,9374 +yt_dlp/extractor/lazy_extractors.py,sha256=5R2BK-6KAyfP6M_N1Bgpy3nBgF__fo6k9NuYuJGeETU,517194 +yt_dlp/extractor/lbry.py,sha256=6l3Wevtr7gQXhmoqO0aOp5dbPdBHVhg9TIHl-UiwH2E,12268 +yt_dlp/extractor/lci.py,sha256=YGqGuGYof7L--KcZLPER-0PvtZR0dShtkVR4MZLN_sY,953 +yt_dlp/extractor/lcp.py,sha256=w9_kD-hhvYHTNtmZAy43bXzEGQCSaesCiobYplNJx90,2953 +yt_dlp/extractor/lecture2go.py,sha256=_Yd-Jzj0xjLfPnFG2Mt1r6zqwHCfy7tgLi3W85tThys,2402 +yt_dlp/extractor/lecturio.py,sha256=kB7kwLsNg5B8l45LC-QYKdAsOyRbEHPbWtd-TiXxOPU,8429 +yt_dlp/extractor/leeco.py,sha256=JfiCa1ZcHOYZvE_18K0BpDgkGHwbmYRC7Dx1IYxbDR8,13087 +yt_dlp/extractor/lego.py,sha256=41hn7KVRmg6W1swGjjuUPkl31-M07kpKF3_OFIsBpS4,5984 +yt_dlp/extractor/lemonde.py,sha256=WUurznnMznn9UaziVRFQjXcIXUgFhsQz5OjsBj0iSKQ,2329 +yt_dlp/extractor/lenta.py,sha256=PevswvoABNTd6ji44aopP2nhuH-6tdrS-zUB0wMHh1Q,1682 +yt_dlp/extractor/libraryofcongress.py,sha256=b71UKQx7R9Gu3GW99Y56He2WqJWsOQtpWQRZOEr-HeE,5029 +yt_dlp/extractor/libsyn.py,sha256=MDvbBLfj1MF24_trhgVlAzq-_GwvAdmn6How1x3H70k,3623 +yt_dlp/extractor/lifenews.py,sha256=rkUO5lfvLIUpj-7kJnrI8x1JUZoZnFQU6zC_TBUH91I,9618 +yt_dlp/extractor/limelight.py,sha256=W57jXiga1W4PE8zdJ1Cn_X3gsoDHWFCkGgoQSorLiv0,15094 +yt_dlp/extractor/line.py,sha256=LX_YSPiiOOYjwwzeP3OZ4TUNhQz5A2cdmnj_cxnf2Zw,5523 +yt_dlp/extractor/linkedin.py,sha256=8eaX_YCQ-UjmDQFIppG8kVd_Nh07JRSciu8qfauQBi8,10117 +yt_dlp/extractor/linuxacademy.py,sha256=KODfoK6z5eYHcsnW9oGEi_lPJLjEpmhrILOgCKJI1uc,9973 +yt_dlp/extractor/litv.py,sha256=ihIHipoQQri8RUJuI4GbT5izCQy1f0rOrs1ayUhtGb8,5788 +yt_dlp/extractor/livejournal.py,sha256=T2d5464NaMaO6cV9zp3o30Adr79usZldYjmhC2DSn8o,1530 +yt_dlp/extractor/livestream.py,sha256=sL4_6UR-aXez9pC0kPXpo6rI_8wUXXF9X7ZnMMiVR8Y,13619 +yt_dlp/extractor/lnkgo.py,sha256=o327BgBbmE_tD6__S0jGjVHK-oBeDhJuDFg2sH9t_b8,6432 +yt_dlp/extractor/localnews8.py,sha256=GdkBlqZslc9V-02U-uI_nEXIHklVCfwMMtNSIafI1-A,1714 +yt_dlp/extractor/lovehomeporn.py,sha256=a1tXvzWNBZOMlnM0-Zx3niQ0u9S4pmbl_NpYpCCYAJ4,1172 +yt_dlp/extractor/lrt.py,sha256=JA78-DjvqgfB23TIKzvQY0p-qPO-jk7NB4mAc_EhUlg,2569 +yt_dlp/extractor/lynda.py,sha256=zP06cUAASDI85uBuUHW12ujP29pIqhFa_vV3Fa3vmNQ,12683 +yt_dlp/extractor/m6.py,sha256=__8WyguAQGNR5ImIRY7NIjTcZJte_SzROPvkrnksS5s,896 +yt_dlp/extractor/magentamusik360.py,sha256=IZhZa3f7y_LNnPn4EtHylXM6dp0o9W24AR-cm0Gd8Jo,3497 +yt_dlp/extractor/mailru.py,sha256=SRdDKZeSX-zpa_eRkgElCYX4RLJVgyi4bkJl8ZRVF9Q,12113 +yt_dlp/extractor/mainstreaming.py,sha256=Aw5_Y1r8N3AJkadxEvzjV-jz0RHoiCR08lCh8Ip5_Oo,9449 +yt_dlp/extractor/malltv.py,sha256=8sVn-GdE2Ux9Q0oG2fZPPx4Sws300WYwIG0TlfKAbFg,3298 +yt_dlp/extractor/mangomolo.py,sha256=ln5RkFs-3xKPeCCeNuH_CkugchtSL0HiVipJ6urwmJs,1975 +yt_dlp/extractor/manoto.py,sha256=VEypBAvWSdtpOnbzEiqhCRFaJB5wPSXk3h-WHY4tAFQ,5277 +yt_dlp/extractor/manyvids.py,sha256=SFWQPYOrKGVQ4sw9lKvvyUW-HNuXG30mUUKY7Kv4srk,3121 +yt_dlp/extractor/maoritv.py,sha256=Egv59DxkpKSV0DxvliJDQCKERKC9jEDoBpoqgaMQ6Uk,1235 +yt_dlp/extractor/markiza.py,sha256=2S4wDvO381nCnG6H-XWDmgxgCTlqOq3Hlu6DD2OnJzE,4522 +yt_dlp/extractor/massengeschmacktv.py,sha256=5byuRLW_Bmsi3wKlLO00u3UvJUpoFzCWfOatZaJc7ZM,2648 +yt_dlp/extractor/matchtv.py,sha256=RS-pUrMXQDnT8mEcim3i19BmVXjI11XpxP6luCzkK3s,1793 +yt_dlp/extractor/mdr.py,sha256=Ftt7-h1om2BQcr5ix24H9HWK5_H4YPuQ1BDslJ-7JMc,6930 +yt_dlp/extractor/medaltv.py,sha256=YSK5xPePW_UeHUmA12ojPqaVA4b5ITK5YG5eZ-xUaQc,4878 +yt_dlp/extractor/mediaite.py,sha256=DSmqOeer7i_sNkyWsXL9qrt1xQs-BiY1gCrF9o6jmvI,4263 +yt_dlp/extractor/mediaklikk.py,sha256=Qe0McLwvwO1aBY4ENoKMSAqnKu5G41zEI6E1x24PHIc,4427 +yt_dlp/extractor/medialaan.py,sha256=hxraPDy6Uet9GuQRnd-XeOvgSpxHJ0l84u7cdrfEMck,4184 +yt_dlp/extractor/mediaset.py,sha256=iAKoIpEteqvzvcH8i09Rh07fFOZaC_STOtemON0N0ew,15076 +yt_dlp/extractor/mediasite.py,sha256=iEi4cW5Wcur3C2iTy6UwZIkSCA2ngNlYj-zjehn8p_8,16688 +yt_dlp/extractor/medici.py,sha256=to8aopo0qtTwbA3I6xLDr6FokMeCmwx6uEAGUlvbazE,2298 +yt_dlp/extractor/megaphone.py,sha256=SGclfYSSyivew67bvCpA59rJo0qvol1sZUIWiEUj60w,1770 +yt_dlp/extractor/megatvcom.py,sha256=YBwZKQRbDDYUFm4IIsbRSmg5fhC-PFMnFWUnSQZ6hkI,6969 +yt_dlp/extractor/meipai.py,sha256=7TXsTZOkGAcjO2N8CNGOSMRbxkpFWtV-aYEfhDJpKlU,3709 +yt_dlp/extractor/melonvod.py,sha256=qOTOj136TFrmJWp0sWvOWRj4vbENV1MpxMhcaKTcNRc,2251 +yt_dlp/extractor/meta.py,sha256=_MN4r2yx7UsYrCF68JHV-MqYYCLocERLGSvizXhxbGg,2623 +yt_dlp/extractor/metacafe.py,sha256=33rUQaoN68tiWrbWo1lwEEZrNTZQV7yd0lBCLyWs4YU,11709 +yt_dlp/extractor/metacritic.py,sha256=-SlqHztKDlM0gFeaAhnIEZFdplnEDZ44YzwblF0arsQ,2671 +yt_dlp/extractor/mgoon.py,sha256=jmsxIiM-avfgRRcvKP4nHN9FJ4hrjy4XyR8JKk_80dA,2682 +yt_dlp/extractor/mgtv.py,sha256=vhUyOntYs31pz5NGVmIk2VuX5rOHuzwQ2FO1sRhOhTs,3548 +yt_dlp/extractor/miaopai.py,sha256=DaCK6HpVwKCjSQskKHVk01GUTZ2ldgB-IkOgYGfQ8AA,1498 +yt_dlp/extractor/microsoftstream.py,sha256=QmRB74na3CCI-JEGUbwSJ56T1gTMfjvmZpmAiHkG3RQ,5482 +yt_dlp/extractor/microsoftvirtualacademy.py,sha256=AG65AgDKsFT0IP6N--qd2v2QjzWP6vbAx3kjbgijWKs,7468 +yt_dlp/extractor/mildom.py,sha256=r6spXs11RJWbtulJbe1W-X2vSqQ8PL8Ev4-ifUxhDbg,11850 +yt_dlp/extractor/minds.py,sha256=sN4Na_YtebbPGz7lxD8QNJNaXTm-eIp2uu2Sb0LOpyE,6931 +yt_dlp/extractor/ministrygrid.py,sha256=SbPHzHs0X879Wcyo49MXPq7nm2MwUEkQDv7Asman8TQ,2167 +yt_dlp/extractor/minoto.py,sha256=SoyfJaFCJBiR3N2SoP-n88xptwXY091b7qOqpjsUKGY,1905 +yt_dlp/extractor/miomio.py,sha256=aHGGTBxgXfm7FH_9RdZC2gNeLNhkBrFF-cSBCknJjj4,5068 +yt_dlp/extractor/mirrativ.py,sha256=zcPFQKk6ggDxuYnNWkSZRmckT0TOzIcDVo3naXYlaG4,5181 +yt_dlp/extractor/mit.py,sha256=jALREZ6NrHjW46CPFygWRCtsTCQw766oFtgI5tNbKa4,5206 +yt_dlp/extractor/mitele.py,sha256=eYqop_uplI6FKIpadS__kCTTSrMqlzQextdpbxwRfXI,3220 +yt_dlp/extractor/mixch.py,sha256=LAFY-4JFpp4IKtm_Iw-uDakmr3B1ULI_F8zTqJKAKFE,3025 +yt_dlp/extractor/mixcloud.py,sha256=kK5gztr8LlRtjoF399wIs_VxfHv0iB34mwbHnQMER-M,12238 +yt_dlp/extractor/mlb.py,sha256=GeaDdUSBaQMStsVk2t-eWVhZqmMNq6tY7ixlEaU5PBU,9355 +yt_dlp/extractor/mlssoccer.py,sha256=ELsNr20FNYcNkaZEc3EfflTxNFIelIYJaaC7ka6YiqY,6753 +yt_dlp/extractor/mnet.py,sha256=aj08wW0K8pmEEnbm32AoaY_6fuuzFqHhTKDjnujbfK4,2925 +yt_dlp/extractor/moevideo.py,sha256=UblJFdrm84lokZH1O82MRngzGOOXxkM-odg_BihN6yY,2821 +yt_dlp/extractor/mofosex.py,sha256=ftAiyC_3aKOkdPHWNNkV7-v18vj2KKIfvtgEo2FTQ38,2758 +yt_dlp/extractor/mojvideo.py,sha256=Honj9JrvguOz5g6ASWi_UN77urNFusqvySr7VzdGAOY,2080 +yt_dlp/extractor/morningstar.py,sha256=WzuCTgxpXWDJPOZQRqXtgE9Mo8VLAHUYbtV_bS0xpvI,1848 +yt_dlp/extractor/motherless.py,sha256=zVkj_8KLzZZCA_bCuKvWJf5rGH6zqqqGuWLAvDlrW1g,9709 +yt_dlp/extractor/motorsport.py,sha256=5A0Jyrbc0xQ_F6QjuhTdc8yLK_RUb5xl1ySWnR9K4co,1804 +yt_dlp/extractor/movieclips.py,sha256=7hRbBekKgLZI2msJKcDlJgzP0tEKBWmVvBaff1stxnw,1891 +yt_dlp/extractor/moviezine.py,sha256=ZsZN0VzTTWxEzKQsoTwLQmeOfFQ4yNhGs3LsFeygsN4,1386 +yt_dlp/extractor/movingimage.py,sha256=QK9El5pvONVsvMTJ8qJ5SwBKmlz_EUMjWi_iXZhYlRY,1774 +yt_dlp/extractor/msn.py,sha256=KT5F7A7igl6LYAxKTc9Lbf0aoNrTpdRXwMxxsTtW-MU,7577 +yt_dlp/extractor/mtv.py,sha256=jjOaOubhpapGw9JkZW0BjMQRet2UGLJdGkft7bvIZgU,25806 +yt_dlp/extractor/muenchentv.py,sha256=h-YhgYjGkHmJjYCNb-3Yk6-EBHcpjvNa5WFQ7TISWlM,2153 +yt_dlp/extractor/musescore.py,sha256=ZDu60_SRaK1wcr2Bl_rV5hiJiCQmJjrwsZ1clifz41M,2712 +yt_dlp/extractor/musicdex.py,sha256=QYLG0zLNnwGDb69Ilyok8U2hHA9deVykawD29TP_V0k,7152 +yt_dlp/extractor/mwave.py,sha256=ebvab_jrHE7M1He1s1jYsmRTIbjYaQKKFbYY-nOlxwg,3279 +yt_dlp/extractor/mxplayer.py,sha256=W_f1CIZrlbjc2D1K8o5vTsIJkTDwt3kZpDQiT0VqgIo,9639 +yt_dlp/extractor/mychannels.py,sha256=7VfODSIti8vQKIAZjeLA14GNgA1IqTKE4WoZaRUHEyw,1576 +yt_dlp/extractor/myspace.py,sha256=wdBKQPtpqOJo6_nLtdMl4tFA2Rft_Unc_ARZmmkqHCM,7902 +yt_dlp/extractor/myspass.py,sha256=W_WmoT11ZvC80oRyJnwKF3zjonyQqEbxibL6LSEIHLo,3771 +yt_dlp/extractor/myvi.py,sha256=siu_7l9irpZrJnDdsuoTQnSzVQ4vj021oytaUOnzFyQ,4018 +yt_dlp/extractor/myvideoge.py,sha256=wkNYmMWXOEKu_NyG_TBO_G2vY-ACOpKgCR7QwtaCuWU,1917 +yt_dlp/extractor/myvidster.py,sha256=rwLc7XNz7RvRpFuPJngVlg7JxBUY0uer1U18MmjC7uQ,918 +yt_dlp/extractor/n1.py,sha256=ow3uhI_b-_5Lq8zyOg1utoEkvQvRFpaqgiOuPO4YWBI,5480 +yt_dlp/extractor/nate.py,sha256=RFw5evtCEQkrDAWMEobxOAuDmvaF8OD748s4iMG8sAw,4396 +yt_dlp/extractor/nationalgeographic.py,sha256=zbhiU1W2MhsRFEBZwAasK67UqDxr8tyuvTSLqGHq13c,2924 +yt_dlp/extractor/naver.py,sha256=YWOCW4Ro2CGM7yTdJOrxuEQae6FO-T8pfWqJ9XccEzw,9591 +yt_dlp/extractor/nba.py,sha256=eUn0hVzCPyQsuu4gEkKIzzvoUcGZX-Bg-Qkf1L5g9pc,16270 +yt_dlp/extractor/nbc.py,sha256=8HrEag_n7HcjLlFRlbPTOmhBvJzNF9nHs2rFVNc3xA0,23550 +yt_dlp/extractor/ndr.py,sha256=hhVQS3AuY69F_CBpYeMauNKozefLgQmQaJL5Dt9DPe0,17215 +yt_dlp/extractor/ndtv.py,sha256=MzO6ROgtpD8q_0XfkCzECK4AZcTg9lus4Fog0u-ZhUk,4606 +yt_dlp/extractor/nebula.py,sha256=eAqRLCOU3XyPuczzvZQhsffjIcUi79H9r_8ogQzgY7o,12093 +yt_dlp/extractor/nerdcubed.py,sha256=vOz9DmjOo1lkr82HcUYnBthBP49MTNbXfZWovRDLuNY,1131 +yt_dlp/extractor/neteasemusic.py,sha256=0osqe6J6lrPr4ESm-V6rRP5cUpEZZiyv8sdgXM8xqgM,17001 +yt_dlp/extractor/netzkino.py,sha256=FiA7Vdt99IbM_wl5_OqE6ZK6Tk5AI_rTGzSDXHUIU2c,2995 +yt_dlp/extractor/newgrounds.py,sha256=HaekXEJX5rFTcBPS9qCWGWWbtgfDNpKG4SSz6fZlxw0,10081 +yt_dlp/extractor/newstube.py,sha256=SdWWBxSGDyxYG-5le1nr4_Kizc0s9kVroFTHF18QCt4,3004 +yt_dlp/extractor/newsy.py,sha256=78MzVBufLsvrMDZLlUFt3jq7IhYl2s0N0fpaBk9qFhQ,1923 +yt_dlp/extractor/nextmedia.py,sha256=0u8bWIDjEl08piHQCXjTtDvOIg4HRlOXGmpPn1Ukdig,8975 +yt_dlp/extractor/nexx.py,sha256=JOowk9vYrrj1RRJHIY-Tcmhexng8PnR3cVRQ_Yb4B4c,21547 +yt_dlp/extractor/nfhsnetwork.py,sha256=-ITOWPLE3FrqapVj-RiScdw5vSim828O9BhHDf-secI,5916 +yt_dlp/extractor/nfl.py,sha256=njguzD7yms8ZJONUKC_6NvQD97mEJ2BAQTlK6jSV0T0,6691 +yt_dlp/extractor/nhk.py,sha256=skcw3aP3R9GeVlEkMVwANlxGOf9rJ0zwtlaXEtvNmHA,6733 +yt_dlp/extractor/nhl.py,sha256=aa3xjsCjlPEbCkqBG3fjttn5maimWns2qTo7koe7UZg,4990 +yt_dlp/extractor/nick.py,sha256=AEqTv0b0EEe3w80wpo-bJeuOUIxNy2wTAvlOIX7dxJU,11247 +yt_dlp/extractor/niconico.py,sha256=pPNenB5_WGmjp-v1lMI9cdFBdPD_FVf3fbanj-PB6zE,32380 +yt_dlp/extractor/ninecninemedia.py,sha256=wUPv-tzSdG-D2dHtAUKj8FyqYgGX0h-hYUcrpuOd2KU,5228 +yt_dlp/extractor/ninegag.py,sha256=tQnrR7dctuv4hnTMsoYMLSU7qGgQ8ifV-GD4JCOmUqk,4248 +yt_dlp/extractor/ninenow.py,sha256=vHkpFSBzGmh_BmKPAsoxUAnahQb9E56erGj-cjZx_ao,5355 +yt_dlp/extractor/nintendo.py,sha256=JgtlYNKwV_b8AB2K_GhEIsP1duaWR_73mfH5CN7d8HQ,1882 +yt_dlp/extractor/nitter.py,sha256=xVJbgkgDi1ArKFUxUB7DVOIQI5nbzZDPWmtkEVTF8vM,10031 +yt_dlp/extractor/njpwworld.py,sha256=yn0xTPBbXLAlpmRImodwQho_cdXtRMbl8HlgGboSmwo,3376 +yt_dlp/extractor/nobelprize.py,sha256=O1e6ijE0rwztBqb7BF6n0Lqce2UwHNV9tVxR909EJSU,2123 +yt_dlp/extractor/noco.py,sha256=BbPLL7iG_7uhDu3-VPBq0OssRnUD1QaAU8TW_00iG1g,8397 +yt_dlp/extractor/nonktube.py,sha256=X3iaGnZeLUNGxXyUZJeZEexGmwVlNDS2deju6GdpnlE,1151 +yt_dlp/extractor/noodlemagazine.py,sha256=CHn9h7c-SUORGF2cv4qz1rTNw29NU9yIOFJec7mCGXY,2671 +yt_dlp/extractor/noovo.py,sha256=PwukJGpVQ08IY8kOYYQKSQ57Ion-7hAy9qlN9Jj5KUY,3658 +yt_dlp/extractor/normalboots.py,sha256=m4lNclfqHBbl-nHf3-4HN1WNyxPaTNx8NBAYbIsSIXo,2181 +yt_dlp/extractor/nosvideo.py,sha256=Vzgy5VwzgjTHb64cT_jAKTEh51-lOzzaG4j8JfboI4k,2480 +yt_dlp/extractor/nova.py,sha256=gvZVztQ9fiy-9GXxIGKbaox7rr0BN8NxLS83HdbhUTU,12921 +yt_dlp/extractor/novaplay.py,sha256=9gFrpiIHsFsHa46-DSVNl4trKIwFRRHhtbkKZaXaPwM,2648 +yt_dlp/extractor/nowness.py,sha256=RdinTo8Md8oN0ZN_PJZ__fYGKqdp5UzFGmVpqCg7YQQ,6020 +yt_dlp/extractor/noz.py,sha256=4DTlw3uK2ZSVDoXcnxVKOjWoWh11ndz4Q08zLn8t74E,3665 +yt_dlp/extractor/npo.py,sha256=Bw5PUoHb3tENeHXEOA4no-wIRx90QvHyTKveLn2GvrM,28126 +yt_dlp/extractor/npr.py,sha256=rvDV91IxW3oZVVzUfCTXdj0WUPBgJfZIKYsk-lZZqjM,5104 +yt_dlp/extractor/nrk.py,sha256=3Eta2Mr3n6XyTf6jchkhwISWQzYMygpoC8ZNP4f6yEc,31918 +yt_dlp/extractor/nrl.py,sha256=YlHme2n17UDtOfRdlfLErFm8vi6UimIMEGKj9ccjaB8,989 +yt_dlp/extractor/ntvcojp.py,sha256=uFxiFXbYX_ZRRRSKHRsVba6J6ffw6mE0AsjMLJxZyYo,2334 +yt_dlp/extractor/ntvde.py,sha256=6jKqkafrJ68RoUKiJeENc6GFTC7Cw2_XLqSzlY-7US4,3192 +yt_dlp/extractor/ntvru.py,sha256=EOuyH_yrShkfVEoa-G7AQQoGtwABAklhX5tdg4RYBMs,5054 +yt_dlp/extractor/nuevo.py,sha256=z2bYgJF6PSe95Br-BBffs4ycLUy-L5e3UzTgNde6IJI,1209 +yt_dlp/extractor/nuvid.py,sha256=x9IDr0v7rgDaAeai6T_F-5MAxTjbVU1lOP3Re1s32es,2484 +yt_dlp/extractor/nytimes.py,sha256=huwydQsBMi7BIdHtJc7F3Gok6IHqACO8VH9ZjQkCDx0,10404 +yt_dlp/extractor/nzherald.py,sha256=IAbkuae48elDaFEGjmrl3baQDSf6e_jcxZonn0Bf4TY,4327 +yt_dlp/extractor/nzz.py,sha256=J6BpQBg5m0hslpsjL0NoJ9T3zDNGcrOFUHNHNZMGz8U,1409 +yt_dlp/extractor/odatv.py,sha256=Pe5IHNta7zhuwSBo6Kvgh7HFzy9z0U68x9Y_pZEE7fc,1497 +yt_dlp/extractor/odnoklassniki.py,sha256=mJyYdfgpzxRilrVPW8dPJSdJuX4EuklW92ggO3-n0zA,12611 +yt_dlp/extractor/oktoberfesttv.py,sha256=gvRARJMfC5pun6uSAKudAo_OpW6Pw87x0aOgLHq5uRE,1486 +yt_dlp/extractor/olympics.py,sha256=aeQNwHCGd26CNTplDm785SvoRoruU-qiD9gylpM7TsM,2954 +yt_dlp/extractor/on24.py,sha256=q0HrKx3wj0z0zN1pfJJAcofQnGCqoKzRBzBZCGfLx5s,4017 +yt_dlp/extractor/once.py,sha256=tMdzAiJAwtdb6VFLgRRqa_4mIfe5-YUNeALmySR8M4Q,2167 +yt_dlp/extractor/ondemandkorea.py,sha256=oV5D6E__G_CYAD7E9Lolj3UcSRXIhbc1zzNMpNcmsPM,3184 +yt_dlp/extractor/onefootball.py,sha256=bXTJ4b9gVPkvF8pSrmBPmrZz53UDMqhMFWDbI93HggI,2198 +yt_dlp/extractor/onet.py,sha256=PNe0NCpGrxs2Cal9VxwsIdUxhLC_NoaIlpXFJ1hvgfQ,9938 +yt_dlp/extractor/onionstudios.py,sha256=FfEUOhdQ9qt6XtVsuCIS2lsK9GE-DeOLnXubN8P5W7Y,1931 +yt_dlp/extractor/ooyala.py,sha256=rDDhxhQ74bkMcl-wgJbD0ZD4LkKDnWfBDNDo0hZ1XGc,8751 +yt_dlp/extractor/opencast.py,sha256=C68-P6PRSMZMhA73pc7elXxXTMpLYl1iGHdXiaGxYL4,7622 +yt_dlp/extractor/openload.py,sha256=OsETtwAjw137pn6-dQTn8zkZVXJpYCpweyNvz9CpvXY,8183 +yt_dlp/extractor/openrec.py,sha256=WELt335gEh5_Lkn52nurV3FEsDRnLT0CcyOzYLUAXHE,5442 +yt_dlp/extractor/ora.py,sha256=okFZ6qG-NIFpM1aVEW4TqRhKVXDca9z1B1PJ-FOr0OI,3223 +yt_dlp/extractor/orf.py,sha256=Tfy66cONUSDSUtPD2vLru-E45-cZ9awIxUM0UeSBdCE,21459 +yt_dlp/extractor/outsidetv.py,sha256=ERY6jBJgOA1vs4dYwWzZvGqdWh2UQiMMWSo8VpECws8,995 +yt_dlp/extractor/packtpub.py,sha256=18YApXd2pgWcek3ZHyWoD0wAnQ6lyYoJCz0Jv-FGGFs,6309 +yt_dlp/extractor/palcomp3.py,sha256=CeWpOcRMm-zioG-hB4CtBVvDrIOfEE2hOIZaC1QNDcM,4513 +yt_dlp/extractor/pandoratv.py,sha256=JdxK-c-oHS9U4lXOmkIRQQ_s67qFoKcXUYR_K9S1RDs,4732 +yt_dlp/extractor/paramountplus.py,sha256=qUhu7aMzH8-OGBD9Q2w7qIXmb6-zdcF37AoE11sOoMY,5769 +yt_dlp/extractor/parliamentliveuk.py,sha256=8EvQ-nvMftymvY5VRRhAyeDL3VgT3m3qSCpI2Vg4swg,3238 +yt_dlp/extractor/parlview.py,sha256=YUQj55sVfqp_QO7m7DEkKR4dxyEEkrSmmI5txLeK8rc,2845 +yt_dlp/extractor/patreon.py,sha256=rSUcX5NyD0GR1AGnMJ_tgu0zc_U_LoU6NbhejZ5r_EE,9138 +yt_dlp/extractor/pbs.py,sha256=lhCqlcuO2dVTCO7MX_KCiV-jcpb1Q_qIPtpR-4C-mbE,36233 +yt_dlp/extractor/pearvideo.py,sha256=kAHBNNbAN83BbfAzRTf9eHIe3z1xyeJ8VS6VKzOe1rU,2103 +yt_dlp/extractor/peertube.py,sha256=JUdGOQ22R0_nO-Zf27CMSaR0OA-YFyc7EB3ezsilXlY,65926 +yt_dlp/extractor/peertv.py,sha256=fuJb3iqSiuWHpECq9YvrmsokjLkDj6mMpo6avHnclQM,2268 +yt_dlp/extractor/peloton.py,sha256=NnGZoyf90X0cJghb3XUlziyMgKL89rxajfv1KbA4IxY,8982 +yt_dlp/extractor/people.py,sha256=KwULIvpQQ6x1DpNPjGEfZ2VlZWzAlRIjWOgW4ufwAhQ,1140 +yt_dlp/extractor/performgroup.py,sha256=CVFjLIruw964LG5GrFPBfWAywaJyHIafJiNvAKkC8e0,3340 +yt_dlp/extractor/periscope.py,sha256=n_yKrr3Ex13BKeL7efpcWJHntqnfSFxoYvvPqxP349E,7182 +yt_dlp/extractor/philharmoniedeparis.py,sha256=CoNvSerEuxRdMwAlBfoiGwQDpJI4tV84YYPwsJ7kXSY,3803 +yt_dlp/extractor/phoenix.py,sha256=xgww-tBADTMPLytYQwhn-JiabgAmjzmLccCHbIDFqcw,5018 +yt_dlp/extractor/photobucket.py,sha256=Is2zEZ7XyK22WB7dDok_edifhJTFeqVZ_6l8gRJ-dmQ,1774 +yt_dlp/extractor/picarto.py,sha256=evEvctUL2I04zPle7R21M2wmzktoOIbOZUfWbooYXSs,3904 +yt_dlp/extractor/piksel.py,sha256=2eoc1wRKn0WeWB7hj3A3nOTILhYozAmqfRtP0GvCfmw,6979 +yt_dlp/extractor/pinkbike.py,sha256=WK-0E27z-8M2UHnT766Cxu_kHuQKjSR-BVj8oKP4vKM,3446 +yt_dlp/extractor/pinterest.py,sha256=1ubGCgLlHgeK3Gq33p1V3o0qU_79iDqX0ckQmwZuAlo,7509 +yt_dlp/extractor/pixivsketch.py,sha256=zuY0EKOz0qpPngT27swzCXlq6I7gnpMZye8VsNA1e0k,4845 +yt_dlp/extractor/pladform.py,sha256=jzUDeDyC_KPTdHP5qLqjo76UrodiDvFW3BnwGVIuH8Q,5194 +yt_dlp/extractor/planetmarathi.py,sha256=1PCSDj5AYD-hihF44bTRDDipQNGlhFqoSG_YKTn7mzc,3058 +yt_dlp/extractor/platzi.py,sha256=G49pMH_i-qQGTS1kuGNMfKrSjgtjAvLJ3Fc8jWJsej8,7630 +yt_dlp/extractor/playfm.py,sha256=wBO0keYkI_hgOSYyuGMeV9ML_dGmLTGGjvHGWwE_YcQ,2585 +yt_dlp/extractor/playplustv.py,sha256=4aTLBYKRXLNJgSIfoCa73IHPWumPoAuDNioF89WDqJg,3710 +yt_dlp/extractor/plays.py,sha256=u4UJtVWOKKMnvJ8GmUoKG_AmtV76WeBZ0Hx-RJjdw3c,1841 +yt_dlp/extractor/playstuff.py,sha256=sbBS_ykorVbPxPIY1HL6MGR2hRWrWgqd9vfy3EGWwqI,2254 +yt_dlp/extractor/playtvak.py,sha256=VJpv0nIqWy73LAoueAzbfUrCr6ih6ATbWhNSXMqJ710,7208 +yt_dlp/extractor/playvid.py,sha256=ZEQqcKgKHgcO-ulHicwQVSH0_kNxFDBDolpDDxGPRLE,3299 +yt_dlp/extractor/playwire.py,sha256=zlFsnJpuZd2H3qJyqRitQQPjXWXH3vrhMbPAbcpXOzw,2394 +yt_dlp/extractor/pluralsight.py,sha256=xxDMVY5ZwNSozAM2xyFqw3yiFLbUScEZN1MhrlKEoow,18581 +yt_dlp/extractor/plutotv.py,sha256=G0EM49iZI2RYuJGnH79vQY9nF44BzXbmy3LpUKutMH4,7639 +yt_dlp/extractor/podomatic.py,sha256=AVd4GjtxDhw9GlFqz08yNvQ9fjFZ-P2iVkPlKTebbkE,2626 +yt_dlp/extractor/pokemon.py,sha256=iK7geoHR8uoS9GfDU5-6cM4bPMp3uteWDi7yOPS4dIM,5533 +yt_dlp/extractor/pokergo.py,sha256=u6Ne7CG3gXHqcTlIDwKNaaAliF5sAt2mQwzp8MTLCTE,4265 +yt_dlp/extractor/polsatgo.py,sha256=8FjpZIoLI2CMPei0jRHgjVuA_iU5iRkSF_Blv0O2ask,3249 +yt_dlp/extractor/polskieradio.py,sha256=i4reK75fxIBcKQVQqeeWFR17uSWGpiazFwYJTyXT13s,16221 +yt_dlp/extractor/popcorntimes.py,sha256=vWPVySI0tYXpABkfPlCKW2qHz58msnb3ojHLphfV-zE,3348 +yt_dlp/extractor/popcorntv.py,sha256=ygMtAHETgYEAedboSL21KG5bdG3hAguuMCSiBYloBuM,2672 +yt_dlp/extractor/porn91.py,sha256=3o1tzJy6excO0QQ-7BNkdD0psUbHBRYUA05wt-ObgTw,2116 +yt_dlp/extractor/porncom.py,sha256=XNETupObtXwBSLSx6x6Qn8zCQj6lDBJ2itgBRlQHuLI,3868 +yt_dlp/extractor/pornez.py,sha256=v8Wf8PGl6zQTCLfCq6mLxk5tcDd5yhZkii_-7hwc7TI,1751 +yt_dlp/extractor/pornflip.py,sha256=FYVdCLmLoj-Rybq4LBSFtcKzUim2_htqxmonsklNobc,3593 +yt_dlp/extractor/pornhd.py,sha256=ARE8-IiR_Jx9OELu3dFQb80cJI6QvOyAO11Gs2GWlLg,4624 +yt_dlp/extractor/pornhub.py,sha256=ze-Xp-8FGt6CeulCt3CCXGb0dYCYVtvQpRMI3ePLBdM,30858 +yt_dlp/extractor/pornotube.py,sha256=DmZN5JDsPGe4coifOCEC0wnFRkEd4_xvvVJ99D_91s8,3154 +yt_dlp/extractor/pornovoisines.py,sha256=CO-ffjrU0r84UcbOiVBf6u-kPm9KCzSfWUR6tWHwJfs,3989 +yt_dlp/extractor/pornoxo.py,sha256=62y046xeFiyRsgtohSl6JAu4ldpbZGllZpw-mVFS1yo,1925 +yt_dlp/extractor/presstv.py,sha256=k4ihqIs1jmV0RGrKhtwYY9rxMJsZMAbqlTklQWlD8tA,2376 +yt_dlp/extractor/projectveritas.py,sha256=_tFev7iJBQZ7S2D67DfZxotrG5LbflFFnjG40BH8zDo,2513 +yt_dlp/extractor/prosiebensat1.py,sha256=9QUAoJ79kEP67N7VYDUp9im-kX4IInTm-4c57vzX5VU,21586 +yt_dlp/extractor/prx.py,sha256=4_BJjjLEEYsqIelJZevIb3XjOVXsSxWJ9s6i-AsA-Vs,16122 +yt_dlp/extractor/puhutv.py,sha256=rtsBjUPrMoHsJIt8u9uCajVteTe5dj0yOnJqxn3SKtU,8458 +yt_dlp/extractor/puls4.py,sha256=SV_mZLVRwT37KJQazbaeZIRhD3ftQvKck_YLu88u2M4,2295 +yt_dlp/extractor/pyvideo.py,sha256=AWKt8IEQ-Bk8l86y0qW2hv7zpTH0Nm_3-gdot_xoOwA,2760 +yt_dlp/extractor/qqmusic.py,sha256=1T0LSInrJAtxoCa1N0HUHKfSPXbrEdSQSwrzLAHSalc,13643 +yt_dlp/extractor/r7.py,sha256=HN74Xmlcy2MdKsc9RPOL93ErN9YbLIYJcwzu0x2EiiI,4600 +yt_dlp/extractor/radiko.py,sha256=pl27JgmhZ6_h9FY5V0zz0jdH6xiePULij8JGJEkqJvc,8362 +yt_dlp/extractor/radiobremen.py,sha256=y7aULrO82HC7UIvrimKLt-40XVlvTEcETbJHokbkugA,2431 +yt_dlp/extractor/radiocanada.py,sha256=3kL2vttMmSJ68pQAkMrFOfQYLbk2UsN1c6YsGI9nFdI,6335 +yt_dlp/extractor/radiode.py,sha256=i3OI0qnkPoIg0Xana0avDXFWdQ6jLBJLf43c6tkOL58,1802 +yt_dlp/extractor/radiofrance.py,sha256=9cBFa_IKcVgm8KlQLw0Z4MMRUPYKQ8ojmtpzwsbdn3Y,2082 +yt_dlp/extractor/radiojavan.py,sha256=32LqzS4lJJptg3gwktxnejKiKLQIDHcA-ER0KdXwUJg,2761 +yt_dlp/extractor/radiokapital.py,sha256=wkVRr8rSoUHXUjXSUHuCDVBOPCXwJijgImq99mTiYo4,3381 +yt_dlp/extractor/radiozet.py,sha256=TdqN2gqmN8XFvCZMeZqUUEe-E7h2TZuBiCgnFGOfs9s,2195 +yt_dlp/extractor/radlive.py,sha256=8cO-9YCT4zIXjF_MNfjj3G3tZXujvF992lBxnc0GPW0,7019 +yt_dlp/extractor/rai.py,sha256=eOwczBPXPcUQmm1zfRejq48eQZEyUzWJVdnpPsVsLwo,28372 +yt_dlp/extractor/raywenderlich.py,sha256=kuaJnaRcqkndWQwsKcQBXMPsh5vkKiwHHZOJ5CvGsr0,6165 +yt_dlp/extractor/rbmaradio.py,sha256=3SVeyzQR0aRMD_XawFEiuXMd5mVLxCXv8zDQZW6Ijq8,2392 +yt_dlp/extractor/rcs.py,sha256=XTxQE6_jlIgTw57e1LmULRQL4c9_DrVYUzK5wgCmN70,17879 +yt_dlp/extractor/rcti.py,sha256=MTorHWub7em53au050CR1DdBTjuzlXmABz0bc0NbDk0,16567 +yt_dlp/extractor/rds.py,sha256=dwSOmqmwp3C9zpVtz8EXcAwMCuaiwlir9MzemUTn74c,2881 +yt_dlp/extractor/redbulltv.py,sha256=hZANdPBru1e01j_FtC3Go3cUczRbKwcNCGVRUJ5a_qc,9314 +yt_dlp/extractor/reddit.py,sha256=iBzVQSIXWuvj_hbi4UrMfQvvkWIHFnhvdqhUEJW0PzU,6235 +yt_dlp/extractor/redgifs.py,sha256=MyHsNfus0246d5tF-vX32QzEN_RSHFldEHTmnTNMQ1A,8233 +yt_dlp/extractor/redtube.py,sha256=XlI31ocRyhjjEOtFpWoLNMCX_9oKBvUN--xYVk4Nit4,6256 +yt_dlp/extractor/regiotv.py,sha256=RA6VQecsCgyz1jAGhCsPdPlnplLx_1ORYh3h6mIIglY,2261 +yt_dlp/extractor/rentv.py,sha256=QwfIfVkwYXyvZQmBJ8xyJjxz55J9_FAXfuc94U-m7Ek,4156 +yt_dlp/extractor/restudy.py,sha256=MdxQ4qg72vrBzSkBmP8f1h0RWlKwommFm3ZUR3QOgGk,1351 +yt_dlp/extractor/reuters.py,sha256=RDhED4kf5pdFtwFpf-5q3Cc2iv8W2evgtfjHl7W8_yU,2438 +yt_dlp/extractor/reverbnation.py,sha256=9NSXxqKFGWzTcRkk7vg0SDL-6z7e3sBpMqgYoSUZr48,1627 +yt_dlp/extractor/rice.py,sha256=V1oRzhYkXq4l4kLvDl4faPqeDaQ7eow-XEorQz-CXtg,4576 +yt_dlp/extractor/rmcdecouverte.py,sha256=4YM0bykbmp1oR2gLcC8DZD2L0nMAZ8lEECjy8-bqhCI,2837 +yt_dlp/extractor/ro220.py,sha256=7Mp2ZOu7abBbP-zt9Bt8kumj_FLxeJxBaSFOv5C6_BY,1452 +yt_dlp/extractor/rockstargames.py,sha256=AV3EgHBLonRlSWqbzG60QIApweFVuB2_fKrJgoF_X1U,2248 +yt_dlp/extractor/roosterteeth.py,sha256=0OkwtRAFQ5_JUaUrN9cnsWgkf_02uC_DRlAgtpNkFNo,9034 +yt_dlp/extractor/rottentomatoes.py,sha256=Ez7Y9YzrL1qN1hje2rMgfP9-x-BweRft5SIVGvUpjzM,1281 +yt_dlp/extractor/roxwel.py,sha256=7wnynxs6NSHe03s1-Ij6Nclu75ReWQY9CGCQIciGnH4,1956 +yt_dlp/extractor/rozhlas.py,sha256=GjIxekwC6-6e5uehrT2F9bqmIpUr404VEoeNMYTWeCQ,1902 +yt_dlp/extractor/rtbf.py,sha256=H9W-m74K5bjwGjAgoqoCnKVi2SlJdScmi3w21mS1jKg,5538 +yt_dlp/extractor/rte.py,sha256=tZzja_0seBTeIuNDOwoVFL2_8eKppozzOQILWnftvOA,6289 +yt_dlp/extractor/rtl2.py,sha256=siHpOXy_t-UO36qVBJEChrIV_kKR1r4vVHBgvkix09E,7217 +yt_dlp/extractor/rtlnl.py,sha256=QnIYrKIA45fADK3yHszA_8oMvKS_sl9vBEI44nMg_-8,5896 +yt_dlp/extractor/rtnews.py,sha256=NOGp8fBFPMFeC4DoFmJR48gSQZNg7SvWHhRivb7__I0,7702 +yt_dlp/extractor/rtp.py,sha256=vhFVXHtiLHbbU1bLOA4nVenM-vazq2m3cTsouglPiak,3428 +yt_dlp/extractor/rtrfm.py,sha256=00NF_OId3_mh66an4qOOa4qCPEvDRoAw74SlLLjMoqI,2797 +yt_dlp/extractor/rts.py,sha256=6Bk0Z97fxjjxX6TpGSq9f0UepiwnmdySlKWsG5F3ims,9570 +yt_dlp/extractor/rtve.py,sha256=2lTSX7osGirDA8_6I_aCx4Hg2QyZEhQi9FhdD0qGfWU,13000 +yt_dlp/extractor/rtvnh.py,sha256=Wl8UjhKEqCRdCEndi5zuuS5_t18E_3AtOH0zHDU24DM,2266 +yt_dlp/extractor/rtvs.py,sha256=aUiPgAIX_n-HdUv4b-6OVz8Wq-WvLXXCqYmCggmRc9Q,1476 +yt_dlp/extractor/ruhd.py,sha256=ldHUBCjz5hBkFREb6HHgkYZ7iA0sblLB1OxJHP-n-IA,1586 +yt_dlp/extractor/rule34video.py,sha256=5yUw1B15soMNZxqp6Efw35uBPkwGkLEI26DFii_2aEM,2345 +yt_dlp/extractor/rumble.py,sha256=_48EuvcGuwmoqfJBdF3WOpUfnzpIfJYLLkzpu-GQ5xw,3781 +yt_dlp/extractor/rutube.py,sha256=x-K84MpohcGDLu2-Sa3vR7t3brl5XDrW-4ZSy5oAPtw,11777 +yt_dlp/extractor/rutv.py,sha256=4rCqfmSuvngfsnDM7FOkAjyI5w_n9scS9j0ah5QHl6c,8016 +yt_dlp/extractor/ruutu.py,sha256=Y97dGRv-jjPKXGuqh-epFTaVMALt_qA02grUY_Z0NKM,9355 +yt_dlp/extractor/ruv.py,sha256=zE_8TAMDDKynpjJt56gsd-6H7d1wfy3HQyghje_rBuc,3359 +yt_dlp/extractor/safari.py,sha256=nnYMIuZGIJQLO4SlNqTx9xRR_ihkxZndiu7eYvP9Afk,10007 +yt_dlp/extractor/saitosan.py,sha256=LR0dSstFZsZI6-doflEPFvloHmar-NW5t8dMB8swQE0,3048 +yt_dlp/extractor/samplefocus.py,sha256=kmsolwwkuHeCzS-sj1l-n_A2MB9rIckBprOfK3iEyJE,3874 +yt_dlp/extractor/sapo.py,sha256=FY56i4wdbrdGQ2BXMjxTBq3QngLK9lVuKMH_TsRqiyk,4492 +yt_dlp/extractor/savefrom.py,sha256=DpFA85cAnU_hj00YdapMwOx9ONaiDbyT_lLoeOdOwh0,1067 +yt_dlp/extractor/sbs.py,sha256=R33ZfFdZQrNOcL3q9FMwITc2X2jZwFsxfpNpmRi7GFw,3700 +yt_dlp/extractor/screencast.py,sha256=1uVxKy19k4ihcCQXrW0cgjVazq3dQuZQNfWygf5ZhvU,4680 +yt_dlp/extractor/screencastomatic.py,sha256=a8TmnROh3kYub6zy56P4qxrGbhttQDE9A2qg08vGi38,1985 +yt_dlp/extractor/scrippsnetworks.py,sha256=xkbnVcyeg9IwjDCkp2BEo25xq8iNO574OibBEnlVLEI,5614 +yt_dlp/extractor/scte.py,sha256=T5XZ7h7_RUXGUVdCrZx7adADIb8rKpINPmqlh8yBfF0,4993 +yt_dlp/extractor/seeker.py,sha256=QpEsoWIcKSVwWNl5EireDgJwCaihITf8_jfCqSTB4Fw,2297 +yt_dlp/extractor/senategov.py,sha256=iQ-6rKFdET5hrAFpitR0d6lwQwr_W9DGDQksguYt6Ns,9007 +yt_dlp/extractor/sendtonews.py,sha256=AKgKERQwuNyB6otgpZyuz6muaj-P2NIHjW5u-s6mlBU,3972 +yt_dlp/extractor/servus.py,sha256=spz_fOliGNHSATzQ30BIkvB2z5E0XN4qYjWBA30dcik,5663 +yt_dlp/extractor/sevenplus.py,sha256=2hyjCRC-x2f2DhtQno1LHWBKnRGRPQGNwiMo5OhqQkQ,4985 +yt_dlp/extractor/sexu.py,sha256=NPH8_EnbfGQmQr5Q-HXn4OKCQKqBvHewNTH-AtF28Vw,2000 +yt_dlp/extractor/seznamzpravy.py,sha256=E_JR_FMP6u3K9-h3afra0gCd5WZG5j9tgq2zmTDN-eg,7861 +yt_dlp/extractor/shahid.py,sha256=PMc0rrpKBfPTB74PX6b7KX2Nv96o5v8vCgU4EvOMHZw,8453 +yt_dlp/extractor/shared.py,sha256=56Xz0RnSt1S0fal3IE9jbbdcAoz6K3wBo9Jv4GxgukQ,4385 +yt_dlp/extractor/shemaroome.py,sha256=qeQf2NWWPJqaDj9DUSdW4yNiFNmcPIYsuAZXfY5h9cY,4334 +yt_dlp/extractor/showroomlive.py,sha256=QbxCbHGAaKmv-5e9R3ozfzxql3Te9dFqWDAK7nnJVME,3109 +yt_dlp/extractor/simplecast.py,sha256=3COjfbshPKzhCcYmt_KBrmCRduydunTOZA1E0v3l6Ww,6219 +yt_dlp/extractor/sina.py,sha256=gXn_K_QddCk9knwtd3T9m27xacfgcdQ00ah0s5I0_rc,4292 +yt_dlp/extractor/sixplay.py,sha256=1B5OCLFxK6IXCC78JrS8O3vuSytrXWxKSYMM62wAq0A,5153 +yt_dlp/extractor/skeb.py,sha256=x6qArwgy7ecC1v1aKUfNEgIrsINZpV1gTNALPdFKvuA,5632 +yt_dlp/extractor/sky.py,sha256=RG21Cb07smG3JnRB5FgKvn3HxRR0_KBMn-FNNJq-QHk,6571 +yt_dlp/extractor/skyit.py,sha256=a0E_yLYbeYHTuVeDQaaOALlHuwAcInUk6ZQNtaqapxM,8767 +yt_dlp/extractor/skylinewebcams.py,sha256=dJ9qTwZ7Y2n8SJ9PXRBCRww3ajUoiZFU9c6I0IT3UwA,1434 +yt_dlp/extractor/skynewsarabia.py,sha256=Fr2v4LrDOHIzOLrynf_KrhvKAEZp1cT_dn94sxZxcU0,5382 +yt_dlp/extractor/skynewsau.py,sha256=GQkFDiVTf6kOP-zZNkyKdc2BefmJ0ac_BG5gWMKng2k,1834 +yt_dlp/extractor/slideshare.py,sha256=J8FH7VnyoWiBEEMxHwiuy-bI81x-a90teQQNAeWw0EA,2118 +yt_dlp/extractor/slideslive.py,sha256=rse7tjkEdEyRnp6uUFOgzrPrUeCkA5AbmoOHl6MRSvE,4164 +yt_dlp/extractor/slutload.py,sha256=ZWBRokCYQ5J1Vh_0fGT7G46FMQ_57LWy-WKx-kIr4so,2350 +yt_dlp/extractor/snotr.py,sha256=iGHjOyp3ceV2IFGF2MWOh5yFxv1HmApEzUP_bEoM6NY,2487 +yt_dlp/extractor/sohu.py,sha256=6nx1N7HAZW-W_txThzkR--gL5s948MXWygsZeovemOQ,6907 +yt_dlp/extractor/sonyliv.py,sha256=V3RSr11OlHlHChEtbY2UurkS8wWUZg4ce8snm2HJyEo,9488 +yt_dlp/extractor/soundcloud.py,sha256=W8AxcHBWtF_HZssYJrQbUAxfxgeukokTZjSPVdLcACc,35875 +yt_dlp/extractor/soundgasm.py,sha256=NZ1-eAc0KScOOv7sngcjnDrzANG2kCsrqjQ5WDooRMo,2410 +yt_dlp/extractor/southpark.py,sha256=0ASqtk7fmzNmkv4HPhOqa3zekRsV6jM_ng9xxS1o6uA,5934 +yt_dlp/extractor/sovietscloset.py,sha256=de_-95yeobLo1jyJxgUMNbQyVy4HedoiJvtxEKzf0PQ,8285 +yt_dlp/extractor/spankbang.py,sha256=qxf8_0aNn2lKS3C_nF5AUhhyyP_quSbOqjWEjSknkvk,7332 +yt_dlp/extractor/spankwire.py,sha256=TcYgBPvp-HHVtT2yNYvdff19jmguIh0gCpUuhv03p2E,6434 +yt_dlp/extractor/spiegel.py,sha256=EgDUyICkkU_bgXU7KxEfde5c-9KFJzoYaT5k6yONfCk,2383 +yt_dlp/extractor/spiegeltv.py,sha256=7zq7XM1qw3OAFfGlr_51uUL_X4Igk653mdfdn56-Wss,504 +yt_dlp/extractor/spike.py,sha256=baMyaa6Kg_BZlVepxWdJlzoOeuxYeWKeM06odw5MQ0g,1702 +yt_dlp/extractor/sport5.py,sha256=WHbhI3jhK4u0tmHD1B2cTi8cPU5AazAmBlO5KfY5C9U,3241 +yt_dlp/extractor/sportbox.py,sha256=LEkNZvI-Apa4qQGrzcP0O46Bpl2GM805ct5VzMM2dOI,3421 +yt_dlp/extractor/sportdeutschland.py,sha256=LBNgJZ6tfh0KUYW4hxQsVRO1YQ0WvwZS010-Mdf5eCg,3990 +yt_dlp/extractor/spotify.py,sha256=JLC6GYO8NxmRkQUqIAW3uj9ne7BQIHDYhR3eKl2Qqpw,5739 +yt_dlp/extractor/spreaker.py,sha256=QPH_duizGh1F8jGTtHEBjwF1Q8xy_WV2duY_njLC3ZU,6030 +yt_dlp/extractor/springboardplatform.py,sha256=_-tgRbwvy-ZksEVsCbX2ocYiq47_DJpzsIfbHhtFYmE,4238 +yt_dlp/extractor/sprout.py,sha256=tdxLll4wGsf2j5sznBsCrjUNIOUDs9yq9ChP8uK_2xw,2326 +yt_dlp/extractor/srgssr.py,sha256=OAEgNmtZpsvf7Sb7QIi8Ji6fV9aAs0QKPA6TZu3ovbY,10031 +yt_dlp/extractor/srmediathek.py,sha256=MbwS2m5zHZ8_faf0yyOwpHDBlY94mAfjkTpPxiOY_gc,2324 +yt_dlp/extractor/stanfordoc.py,sha256=gvdQubKgRuIgqrcIdHEBrhysEEA7-Y9InJgzmq_YXO4,3526 +yt_dlp/extractor/startv.py,sha256=6abzbhklCF9azevdgJ3r-s9OfWeVBoZL_TLBqLPTjxk,3587 +yt_dlp/extractor/steam.py,sha256=Sd4MFj3i1mDU7rIAVt95FF4FTBXFNzM8RYyl2ac4uQA,5008 +yt_dlp/extractor/stitcher.py,sha256=xnSx4bY1ZyjzZ9V_xJFb23O8mgaXUd_fkn87VEYfOmk,5371 +yt_dlp/extractor/storyfire.py,sha256=AtWm5l7iRunDjruobwrZUnOiO2q5GGiJnWhjlZYV1UU,4842 +yt_dlp/extractor/streamable.py,sha256=T6RMQR5bEAwmt_oAdlwslF5KITBnmWo1XO_ACgB1b44,4171 +yt_dlp/extractor/streamanity.py,sha256=6Z_-hXbf99chavjInoJkTFtTgN6iHALJjmhJquLwBow,1877 +yt_dlp/extractor/streamcloud.py,sha256=HwTjFC2YWqVtf40vHtHviJ2Ih6df5I9fJuGOq1D2cqA,2558 +yt_dlp/extractor/streamcz.py,sha256=soiFw8HOwO2F0sJPFk4f5GNNJtnSp5lB2cGC6Q_c25w,4335 +yt_dlp/extractor/streamff.py,sha256=cG51NMpkI0Ju8DlvUarsS0OKUn_Ciu8FJVNzVEC-1nI,1035 +yt_dlp/extractor/streetvoice.py,sha256=nlFc2xuZU1M6oE6UuelkO5zLCbeIvsLbY47Yy9pZ7i4,3624 +yt_dlp/extractor/stretchinternet.py,sha256=QCpA8rRYVStNPwXN4ValhSgqxhAwLyiQtqpk0g5-bD8,1339 +yt_dlp/extractor/stripchat.py,sha256=_QfJyk13caneke1Jcew3z_vkgRsHj6JcEfEaVIs7Xk0,2453 +yt_dlp/extractor/stv.py,sha256=nOHXIk2T1NIvYhI2Mu2AHYgwpoiKdv-dFc33x3MgxKY,3293 +yt_dlp/extractor/sunporno.py,sha256=-NOofT29vmPf8v-TVaGI0OL5NoSNmZ7WTWdGYlfNb2U,2653 +yt_dlp/extractor/sverigesradio.py,sha256=BcFOLpV4WcTkloyjzq0wojcT8P5Hz0VPaE0khb_Y1kM,3910 +yt_dlp/extractor/svt.py,sha256=PHE5gzyZslXks9DprfTqX_nY7LGjDekdWsRk1F-Eh08,15178 +yt_dlp/extractor/swrmediathek.py,sha256=gHyGGJ3Z_nIDUeyG4OjrMb0OqAWZVWuoZ6lf1zySAe8,4375 +yt_dlp/extractor/syfy.py,sha256=m0X0lV2xID1igP0AIXTWOu-NsLT5HElPOr7dUcX8YIE,2046 +yt_dlp/extractor/sztvhu.py,sha256=lZI5WJr6CERT-CrhzBMpfWcLh0fam6ERX0s4fL0IH5s,1673 +yt_dlp/extractor/tagesschau.py,sha256=0UGti1T7X49IFZs_Ez7gw2OT0-vgAD79Hz_TV9vGNNA,5845 +yt_dlp/extractor/tass.py,sha256=mgewmCf7AKgZJbnAKTnN1rIoDiveaiWhY14u1twvqQI,2016 +yt_dlp/extractor/tastytrade.py,sha256=s3PM4cHXXvP4BDO4LUKLeUpou1gH-L59d2kw1MuJg3I,1452 +yt_dlp/extractor/tbs.py,sha256=KzFWPQNtsB2eyfFp5wBkF1J1YN_ZEMXSLjVN46ju4YM,3544 +yt_dlp/extractor/tdslifeway.py,sha256=XdAGbQFWTo3eZc5EcRQimHSpetLiOoTPQrNurFg6p_U,2482 +yt_dlp/extractor/teachable.py,sha256=buje-lz0WJvy0wAyXcNzGIfFqPP4av7luM6659jtR4A,10491 +yt_dlp/extractor/teachertube.py,sha256=U1ckr-k91fGlQIZykzgXRYFrJgxJkywmd5kOQdHRwhU,4413 +yt_dlp/extractor/teachingchannel.py,sha256=B1HdMyCS2G-jWosPC49PiF25rgJlZ0_I7N4xGQ182-g,1062 +yt_dlp/extractor/teamcoco.py,sha256=v8MipSFQZxCXqFa2VkGQGF_yy1RdrkgHcBIWRnaq6nc,7310 +yt_dlp/extractor/teamtreehouse.py,sha256=m_ZZRONLNZU6drrmsyuss-8AHumDLtEpxyk70yYM6po,5504 +yt_dlp/extractor/techtalks.py,sha256=jkeuaVqojqoI89oQPAi55nvw5_Pn-Fd3LWJMFFGUIU0,2525 +yt_dlp/extractor/ted.py,sha256=RA0qy_m3TJ0ip9xu8cidw_kkZD5vk_4RYZNAlEIpTXE,9978 +yt_dlp/extractor/tele13.py,sha256=IladQcY9GAUrZbc1fLHoNjdeKmPFQgB2c5TI3te5o3s,3342 +yt_dlp/extractor/tele5.py,sha256=zepZYmhndXr0YmHcRRUxxidMJRvnCTdkvwxgN15eK4c,3646 +yt_dlp/extractor/telebruxelles.py,sha256=OVYdjBTyYBwv7sTZqLZT2vnxghnLv2WA5CivETndy1s,2909 +yt_dlp/extractor/telecinco.py,sha256=m41pTjkE2Aw70iqfHokkdWdkvw-eRgsdejrl4zJveGM,6226 +yt_dlp/extractor/telegraaf.py,sha256=XqfN0HDiYKS3b6ETlUGwQb8yeBev69e7-W6wa9Bq-F0,2995 +yt_dlp/extractor/telemb.py,sha256=BhZq2pKqwmkGjXw4PMcPzBG1tnfo-v6lsh8qygnrEgk,2963 +yt_dlp/extractor/telemundo.py,sha256=7flA_lSP_JN5jQcxXmzHlljWAkO96A8lIw8dNt2s_iU,2465 +yt_dlp/extractor/telequebec.py,sha256=KCorumS_-su68pARrbUcyYtlZsO7PpSaTB2SCTKCf0s,9044 +yt_dlp/extractor/teletask.py,sha256=KM53eddFquvkzy8G1ZDytCDCbkqu3TNcUp7LRRgzDOY,1739 +yt_dlp/extractor/telewebion.py,sha256=aUymG85PJ6_YlDO5HHMneabKI-jiSXLXb7Jbnyrp7cM,1843 +yt_dlp/extractor/tennistv.py,sha256=U7q0iEcX-PtpDkvhYlRp0KsgUqKsR8UB9PMMQX_5c08,3989 +yt_dlp/extractor/tenplay.py,sha256=Vap9xL_92kcH2hQZBWJNhXqt9z7NjHO2yDCFDAD-Ze4,3332 +yt_dlp/extractor/testurl.py,sha256=6Mmx1Hv1nJMAfBg3WIOPVkpcvffJXXC0PP2xuMct8Co,2107 +yt_dlp/extractor/tf1.py,sha256=HKgyQZwjNLuony4x5Mq95gR0uZ8LIgWibtkbQgJp2Kg,3024 +yt_dlp/extractor/tfo.py,sha256=vohOTvff5RWo-4RvIgHVFrWfR443W7nBk3r-jtsf_3k,2038 +yt_dlp/extractor/theintercept.py,sha256=1_QlPe4dkBHsyGaohQXEBs8snAqfUxf2glzqzVKH1SE,1801 +yt_dlp/extractor/theplatform.py,sha256=gVeXx9_IkcaSVWdyvKlCYBGN2GGd5aU7dZMxYauf-pQ,17381 +yt_dlp/extractor/thescene.py,sha256=ynX1Emxc8aPg-yNq6d2MXe5n8xFwHA-oLaWExVsMnAg,1373 +yt_dlp/extractor/thestar.py,sha256=gr_84HjbU-mokGY1_8nPtuXXPlEOfknbFRlePIRlHKU,1404 +yt_dlp/extractor/thesun.py,sha256=oNQ7quHxhiifx-fti-BHo6gnkMmum1Cx4WSCrWtv09k,1402 +yt_dlp/extractor/theta.py,sha256=vxOeIJVEJcoKjnGbDX9ZzsuLO4m0CF_GaSnp-cExni8,3708 +yt_dlp/extractor/theweatherchannel.py,sha256=Xi5PVoGnpxac6RcWdEJ8GPoTGjgoZjHBhaZ-Kg2o30o,4001 +yt_dlp/extractor/thisamericanlife.py,sha256=2G1ThNlbG-tgAGsMxrn79i9TaWYehG_aJvrowSs6LJg,1550 +yt_dlp/extractor/thisav.py,sha256=zWom_V9DvGd9pWT6ik3vyOqKiO0oitXKu-lZv3sA_m0,2517 +yt_dlp/extractor/thisoldhouse.py,sha256=SFdMY8lqsC3XwpYrFK1IdaPqDMKcU3xJWVAA8_t8XkM,2738 +yt_dlp/extractor/threeqsdn.py,sha256=jiYB5xhT4L4vnrReVy9Zo27HwWLFq35ACH7ln5kO3VY,6130 +yt_dlp/extractor/threespeak.py,sha256=xSnjY6xxlZI0wsvmIq6xC1UlIG96Cvz216JWZPYvpFU,4098 +yt_dlp/extractor/tiktok.py,sha256=WGKT6UC9q7Pb10bEm4ZpcJQmGHREGaIoqMr4msPuQtg,36996 +yt_dlp/extractor/tinypic.py,sha256=WxGvzkAe-degM0o1BkKl2HaLAWh_dNqd-2YQZlaHCq4,1892 +yt_dlp/extractor/tmz.py,sha256=g5N5r7F0kwLL6vum4qtVnJzAFnvjdGEnr3aCsCbJ3T4,7685 +yt_dlp/extractor/tnaflix.py,sha256=GeWD50SKyWYsEb88FKOKAgkDb2MrCqixhRTOTQ0UCOs,12215 +yt_dlp/extractor/toggle.py,sha256=j5v1J--ZNsJYiWvFdgPUX3FMn2XpDFwv80NXCOZH7Uk,8905 +yt_dlp/extractor/toggo.py,sha256=dZtcOt1T-gY_DS0nuStGczyG4DeuKewqc4k9WkdsQbc,2940 +yt_dlp/extractor/tokentube.py,sha256=WkHecbiuO8fHAPJSG98OJsGlYXARf4dc6fA-m4d1A9U,5715 +yt_dlp/extractor/tonline.py,sha256=kEMRahWn6TMKDMM-h-DWsY_VfGVkyqvHiUlLw3lu_os,1925 +yt_dlp/extractor/toongoggles.py,sha256=uLq-zZFoqDFzp_Dermbn7-w4fSqa7hScPNnS9UVct7Q,3030 +yt_dlp/extractor/toutv.py,sha256=N4qOJ_MvTvhFiIaRu8_JIHjpirol4huhgXPlzMh87tk,3607 +yt_dlp/extractor/toypics.py,sha256=M18yaZZjSdTPHQQ2FqKLJYxJ0ZTJUblQXgADnFauvHg,2746 +yt_dlp/extractor/traileraddict.py,sha256=iiyzhehZxHB4zQzas_TqeKxTA5xC4wv9JaL9DiDuF_0,2688 +yt_dlp/extractor/trilulilu.py,sha256=Ml-ILj8TyekWMYUtrzjPY2jZH9HLNtRk7cSzSc-v-Us,3624 +yt_dlp/extractor/trovo.py,sha256=BOsSg2tPjutRDrPlx3ohpqfPJ6XornDfnK0ou4oNcCc,8706 +yt_dlp/extractor/trueid.py,sha256=6rmKfbuEqxz_RY96WoSK6vSCCcUtnnQsu89rTftD5CQ,6405 +yt_dlp/extractor/trunews.py,sha256=KuyHxMQ4sALYqf9Z4oIgzfoYBfzI9n0EImy7HuRDXp4,1485 +yt_dlp/extractor/trutv.py,sha256=qR9uqSwztZ9xk0yaDqbgy7q_7reax4XtqBfn8dELNa0,2518 +yt_dlp/extractor/tube8.py,sha256=EMOmB0K20cYXr4hOGi8hupmF1n5G0KyMOVt757D6zYY,3087 +yt_dlp/extractor/tubitv.py,sha256=bZ2L78TeXQ3KWPOLkDBQG9AFNy2tW_09ylUbg3ywfR0,5125 +yt_dlp/extractor/tudou.py,sha256=KT3Lu_X3dpNXtWeWt_PmCfe1iNr6X24MoXTFQgVbZ4M,1699 +yt_dlp/extractor/tumblr.py,sha256=zTqug0G6oZ9R39i0GOwaDUJ6SL1XW9SC6v7dEWoh1K8,8162 +yt_dlp/extractor/tunein.py,sha256=hBfCA9LVuqnLg7c5kJjuc0HiLPrFzU5PVKUlu3fdmus,5891 +yt_dlp/extractor/tunepk.py,sha256=ycdgW_wcCBhQudwSZnYugOlnP0rnVrkQAzs0CvesOEs,3200 +yt_dlp/extractor/turbo.py,sha256=s0exeOIriig_RdbOhIugNerx5308Sn-Jh5HEGL8y_Rw,2427 +yt_dlp/extractor/turner.py,sha256=xOK76TEnGPTkLK63DuvIdVyEMIjPoaOzMXZGtM5pU08,11162 +yt_dlp/extractor/tv2.py,sha256=bR1NRbpc9nSgy33A8yH6D05j-bsR_Ur7VaF_LS-HOoI,13628 +yt_dlp/extractor/tv2dk.py,sha256=f6dvArPaJLL1Zh_bPmS45Ias4B9QRhfhh2x3lAhttTs,5670 +yt_dlp/extractor/tv2hu.py,sha256=9rvcMNT6gNjl9wJN34h2HgFkEN42HSseHuZEtm7B9ps,4151 +yt_dlp/extractor/tv4.py,sha256=Bhcgy5qcEjQ706yqYeq4GAsJ-qprT72d9ILmK-OJPJE,4930 +yt_dlp/extractor/tv5mondeplus.py,sha256=1xHVkKeytQm0Yjg5EUpsTOLUX2DQXE_3Xs23M0JoyAc,4645 +yt_dlp/extractor/tv5unis.py,sha256=YZg1h2M9Y_s-hItYH_TufELFj-501trNUEKtbOOVAn0,4076 +yt_dlp/extractor/tva.py,sha256=n-upHpJ0k2YhlmqASOQAVx_h_x-BPcip6-oDGub46AA,3158 +yt_dlp/extractor/tvanouvelles.py,sha256=f7s4Tspf2b_0GeUI4kKwqFydUNwKhcOcs9ZQxZ6NhX8,2401 +yt_dlp/extractor/tvc.py,sha256=OBCOylD9bVEb0W8jyAdT05uPOaZnLUUZGMAXOZfoUYc,3910 +yt_dlp/extractor/tver.py,sha256=N8yxw0yxP6-yzZynsdM3NGpHrOxFQdN3mYFTBsoR-z0,2472 +yt_dlp/extractor/tvigle.py,sha256=6GeFd2J5m2vVji-QApJZiJ_ggla_xTXZH672qLWSLeA,5036 +yt_dlp/extractor/tvland.py,sha256=gcgs3IMOHwvmGZ7T3YoON0BTsxR-jf_CSpwUzy9gQag,1580 +yt_dlp/extractor/tvn24.py,sha256=h63gtpUSfdSTu9RftEqSNuclymynOaifP-_rPsZ5oGg,3902 +yt_dlp/extractor/tvnet.py,sha256=EyaqqyUyj8lihwIrEE2Tuo4I71lWAlXAedavLkX4o7w,4877 +yt_dlp/extractor/tvnoe.py,sha256=50w6JIoLb_PnjKB1ZtC2TKseNqoyw159n9i1pIB8k4o,1612 +yt_dlp/extractor/tvnow.py,sha256=3T5VGnkUly-mObkryfvOrBWU9G_WMa_R2yrSCVRtoOg,24114 +yt_dlp/extractor/tvopengr.py,sha256=kmvzTuemObirjMbMxW6j0IiIEejXEl2eqjTvP5N1bMI,6042 +yt_dlp/extractor/tvp.py,sha256=B9YysEoc1UuqDhedWOJ_yJL9GYp4Y3hKGuN_mCUN2EI,20073 +yt_dlp/extractor/tvplay.py,sha256=Pg8o-Jmr7HvAzWL2jFbPsrS33FtSmK5AiasI2gHHmGw,20349 +yt_dlp/extractor/tvplayer.py,sha256=ObddgpjtPwRtHMxwW614H1f1OzSh0giZj4PYjl407Pg,2808 +yt_dlp/extractor/tweakers.py,sha256=vgDrlTVOM2gDhqzz2lgJM31MLU3VzsWOLbQyddJqfU0,2165 +yt_dlp/extractor/twentyfourvideo.py,sha256=LV3udCJEUcNDwb3Rq4fDO4d9monR8FOriEbyCfuf67Y,4742 +yt_dlp/extractor/twentymin.py,sha256=GOJ288OwQ_qM5vXAtVHPybmlzxKA8hRGI2WlHgyeBUQ,2860 +yt_dlp/extractor/twentythreevideo.py,sha256=NVfD77e_WRddtIIENZuC9uIdpqcEvHNo9VlLGGbn5Y4,3284 +yt_dlp/extractor/twitcasting.py,sha256=MRwREavQJ_KaJq0cca4sOHgRjrIOj7N8n9gL8-yXhtU,9752 +yt_dlp/extractor/twitch.py,sha256=gwCJrqxZPFmTlK7JjkErD3Ch6kZ-1TET5wTU8d1-JOY,36572 +yt_dlp/extractor/twitter.py,sha256=jSQJHAhL09KH0k4LlT9iSJbVudpN7vpv6S6zTkeVMCQ,29339 +yt_dlp/extractor/udemy.py,sha256=zIiOcuqB6mZBjgyweaIc6rsw8I1pOKcWV6PiUEdLB9w,19351 +yt_dlp/extractor/udn.py,sha256=Txl0apTktMHK6uVftWDh0z15OiAnH3Pxz0a3uA-wBgU,3575 +yt_dlp/extractor/ufctv.py,sha256=r_RcKNQODJlgTk4GjmyfE_THH6UqVTIQ2U994hAOFDs,511 +yt_dlp/extractor/ukcolumn.py,sha256=nqiPzLjVvj8D-HDQjHYnTXBS_0nJbaQfByShDfhCCfM,2479 +yt_dlp/extractor/uktvplay.py,sha256=6qdHuUcP3GlqJzmkLrCEuqczBfzhsiq7GZ5h8_uDMeA,1364 +yt_dlp/extractor/umg.py,sha256=MT-VK7tETQkXN38mJ6VFozj0K6qOdH_Pt48sFKSEVXE,3262 +yt_dlp/extractor/unistra.py,sha256=6fYrcVe3Xvp3DOmo5brk1Ue6Er-SK8JkAz6OyeOcnbo,2154 +yt_dlp/extractor/unity.py,sha256=vACiIJLvVHHzc_Pp2-pP2IdGzXzLjEQKy-s3cbsAXY8,1228 +yt_dlp/extractor/uol.py,sha256=_p_xm-XANk-JyJhYC7LbRoHNp8delXBVEWQxNYvOFnI,5503 +yt_dlp/extractor/uplynk.py,sha256=HmcGapHkHkQr4DdQ2kja8cAFsoop8q0Jgfv9LggohdM,2682 +yt_dlp/extractor/urort.py,sha256=vpYSn_pvMcTvJU_VOskUHTC_nv9QVR6hQ9EVK3JflSs,2246 +yt_dlp/extractor/urplay.py,sha256=-CcnOTxkqa4Ym6u-xibNOluC6uH4UKwiYICT0CAHTUw,4716 +yt_dlp/extractor/usanetwork.py,sha256=yZ-1KBXE7dC3w4MllGznrd4hl2K3K4mdfGdCA0qdZsg,814 +yt_dlp/extractor/usatoday.py,sha256=_LtMn5HA5yfuzvf9zvatmv2sZqVU4sF9W4yy2_4-GmU,2703 +yt_dlp/extractor/ustream.py,sha256=WcgtYkDIzEeUJRTPmZW0R8vGjCyxPuNCNMTXBAN4Tv4,10766 +yt_dlp/extractor/ustudio.py,sha256=4RABcQbCKyRFowqdzeV-YlGtKpEnla6ZyTQ-HbMc-88,4374 +yt_dlp/extractor/utreon.py,sha256=UnZcOhNplMBAZy5lXJK2OBCt2aDxib4MPfLDWKfixlM,3514 +yt_dlp/extractor/varzesh3.py,sha256=IAIAObbtb2EkZzYhdKCJINakg68BRC60nMC68Yil8LM,3129 +yt_dlp/extractor/vbox7.py,sha256=vaeR4Zr1kHy0gP5ItE2txuA-gP3GN4EwTzPS7u7ToVo,3413 +yt_dlp/extractor/veehd.py,sha256=0phpRUSoldfAuGaYI5FFyXfqgUBv6r0zn1UgrpVoLno,4134 +yt_dlp/extractor/veo.py,sha256=hBlN8vYB-bnGsd6M8iUiC9qXs-HzlSlvMYXw6Ld9b6s,2358 +yt_dlp/extractor/veoh.py,sha256=PlrsuRyvP5QPgo1HUkR6tMUOmV61S7GhLuvaEtZoIUE,4790 +yt_dlp/extractor/vesti.py,sha256=N4thLQJ0fwtnog064AmL9dlOHCCB-p7-L08_tp-YDOI,4379 +yt_dlp/extractor/vevo.py,sha256=N3_x4ybBKlc7oekai50YxYik1xyqf56DsuikrzTPAA8,8856 +yt_dlp/extractor/vgtv.py,sha256=cSz3eNIY79w4A3XOzE3SO2MYO0VXLX1bmAjNr35zMIA,10830 +yt_dlp/extractor/vh1.py,sha256=MYB0OdpGm5VVOIo0rARO6moicB5hbyad2y5zrD5iR9o,1438 +yt_dlp/extractor/vice.py,sha256=_aXgqjYXWwjo2P5o5ICkaOdNfFUs9o7iHX7k1hl2iBo,12367 +yt_dlp/extractor/vidbit.py,sha256=43jtzjcpxfhPHJW2F12xqGKg_iIJwV5RCaLhSJQgD4M,2917 +yt_dlp/extractor/viddler.py,sha256=04_l79Q6kBHm18pX_tUR8D_yMJz3xn88Ui9INygXhfg,4876 +yt_dlp/extractor/videa.py,sha256=tmA1D1pLSDWEEqnIQmyqZBPUQCe2gWmlNC87fdNdN3c,6701 +yt_dlp/extractor/videodetective.py,sha256=fgoV7IFckQMOZiPHIoy6TEp17NcWti_YNz1ti1l1a5I,906 +yt_dlp/extractor/videofyme.py,sha256=d6T9_m7BcB0u6ZU5CsiyIFyBYzF_kh22HlcOsWsYsv8,1750 +yt_dlp/extractor/videomore.py,sha256=qA-Q5QcXE6X1MUJyYrEgNpnM8erc9_Xtqv2LMCj72nc,11649 +yt_dlp/extractor/videopress.py,sha256=l8ud1F4fg_LDnhtxtr3ymKtLSdjc1VVJV8oiHggG7ss,3359 +yt_dlp/extractor/vidio.py,sha256=sqlHupfd5z571mGc4fPaRVtUfUddN8DHvE9GKqRN8U0,12538 +yt_dlp/extractor/vidlii.py,sha256=z_RJk7NRShBOjwIlOdpFm5dUkGaGE_mXWogSqOY1Xnk,5831 +yt_dlp/extractor/vidzi.py,sha256=bwKHsEIf-KwvXE6tUuqWcdnOgXGuY2EzhnjLP4U71ys,2165 +yt_dlp/extractor/vier.py,sha256=Y8DT62_VorP50zG8EzCjvveXk8UBuC4xwVxlzkFoGVc,10000 +yt_dlp/extractor/viewlift.py,sha256=h5OKngr1JOc6bheyJBMsMLfnttWrcFLRoxT9tu0-ucM,14237 +yt_dlp/extractor/viidea.py,sha256=UA1RKYUPQ2LKrGw9xl8i4cxZQzX-Vc5YW1sXafG6o94,7442 +yt_dlp/extractor/viki.py,sha256=xwtjtw21LNN4lFDwgxU55WmbQ8AsQjcpd5Srt5X4Uoo,13686 +yt_dlp/extractor/vimeo.py,sha256=Bjz8Re1hbVeLDHkyt5BhpYLwwtKXRM5U6hG1bqLmsV8,54404 +yt_dlp/extractor/vimm.py,sha256=tSEQ-AuwqEd254ve-enPFmtc8vQkF3yaNJPTjEeVj_M,2242 +yt_dlp/extractor/vimple.py,sha256=dfP77hOY7EJURUoPRUycV02JRvYrryGZ4NmO6Y4FVFI,1968 +yt_dlp/extractor/vine.py,sha256=yt_PpFjAphNJ9up0ld7xUQF_TWEZZaTKyttvo8eUc0M,5378 +yt_dlp/extractor/viqeo.py,sha256=vlgfyQUeQJ5M7VgE0eZX6dBpoauCBaNnFHa5rcJdQ3E,3298 +yt_dlp/extractor/viu.py,sha256=sX9BDtaQVxvg7tPm4brODnz1rt9zJBr37BgVGJD30tk,14681 +yt_dlp/extractor/vk.py,sha256=mrNWwTD3F8QYfwTmC6p7D8C-f0TrQLm1dSxvdlhjg6Y,27164 +yt_dlp/extractor/vlive.py,sha256=n3rltRjbMTS9ltmKkwh2zV6Z8_1evBoIvNDBXFFkxNg,14160 +yt_dlp/extractor/vodlocker.py,sha256=-XT2R046SvkSiq8PUTmwMQFAJHqyIItvPLQff-l4PKw,2796 +yt_dlp/extractor/vodpl.py,sha256=e-_HzG-ClVQHIhNlZZLzlv0gm9eVhFGPHosVY_ZbeMc,1032 +yt_dlp/extractor/vodplatform.py,sha256=RgjUxzW-ZYQVs_eG8n17fMn121ZE6zhNVW9sbZV1aD8,1569 +yt_dlp/extractor/voicerepublic.py,sha256=WfeKnyHR4vz9SjK1FALi5xMG-i6NQqqQej8w3Wp9yF4,2302 +yt_dlp/extractor/voicy.py,sha256=HEc5Kls7hw6iFMYnTLhoy4-5eQnnOEp5_ZXqmaobDvk,5475 +yt_dlp/extractor/voot.py,sha256=MO4l-uo1AJGEx9wskFeHR02nEH7dzlMNrTJSnLp2Zm8,5954 +yt_dlp/extractor/voxmedia.py,sha256=kdShPoHkl_OXjfPpEppMl-z5bS7uPmL-FrKOrYiccU8,9844 +yt_dlp/extractor/vrak.py,sha256=C6F1rIjKRggflKqpaAwI9BtwxGjmi5ldOgVz6iKiIOc,2943 +yt_dlp/extractor/vrt.py,sha256=BmkqS4q48TOsgnLzn6_vX2HBatkTr0FDe2JGeReVk-M,3525 +yt_dlp/extractor/vrv.py,sha256=rccFUKJfaG527jgUA7UY3eEwFeXjCqtdz7XCfVohu7U,11020 +yt_dlp/extractor/vshare.py,sha256=h29M1CnmGclJI90-s4hr8izNCFHq4fXvtW8CXr3E-cc,2281 +yt_dlp/extractor/vtm.py,sha256=7_nJ-e1OZlqO0jolp_VdCKU89MVNSSTu1irJZWVOy5w,1899 +yt_dlp/extractor/vube.py,sha256=sx11tAET9XZk3E5EnRlWKNOBQ5F-CKHGAqI7kVxoi_Y,6900 +yt_dlp/extractor/vuclip.py,sha256=ENGA1EbH1VfD-FKJFOTvlvYtE_PeHuopyswhy8bxH3k,2254 +yt_dlp/extractor/vupload.py,sha256=DFzid218NeqcMbvC2b68wxuDc0Q3tqpQ2TljXlpcPUY,2205 +yt_dlp/extractor/vvvvid.py,sha256=P3LbAREkYpWhSCDftzRiLzoyUU97i9A88CH3bRVxvFk,10292 +yt_dlp/extractor/vyborymos.py,sha256=8RcH7iYKWhpZ11Ot1E3kpjBt39aW6EMd8V3_TjmUbFc,2013 +yt_dlp/extractor/vzaar.py,sha256=l2GiRL_jGCl0H0Y0giZY6XQcJbD0HowJSR8tGJd6xdQ,3608 +yt_dlp/extractor/wakanim.py,sha256=QKARJCxFFg3__LdoH4wIZNWl8fb_I3Pj894wHnKrVkQ,2833 +yt_dlp/extractor/walla.py,sha256=skTPWb_-M-LmaxyrYUcJPh8j5tx2vZrb_3N28GV63QI,2813 +yt_dlp/extractor/washingtonpost.py,sha256=o9PQAmgYpmXaKO_tuJhVb3Q4vENkp0Jb3TbbDbT4JkM,5257 +yt_dlp/extractor/wat.py,sha256=iPUkU70k9a5QEgGAOnrpCKOuOT1_GsSR78ccMumuRW0,4300 +yt_dlp/extractor/watchbox.py,sha256=VG1mSjCJqP02eOrssWmnB1LQQYIclwJBVOb6r0PZAvc,5858 +yt_dlp/extractor/watchindianporn.py,sha256=fUvjtvChnVjLQKIViR7kLqIWYh2liieDlEjjv38DMV8,2293 +yt_dlp/extractor/wdr.py,sha256=H1-DD2_KjON7ROK0sUlu7GfUDlu_7xx6zDwcbvOzh4c,13551 +yt_dlp/extractor/webcaster.py,sha256=25k6gKI_RasQzrNU9W10ztJEP6Hh662dzIOSXNR-kI8,3777 +yt_dlp/extractor/webofstories.py,sha256=LPUtQWdHrE833OFSyAP1I3MKQFawar6kseO47BKKbFI,5550 +yt_dlp/extractor/weibo.py,sha256=EYrlrlb1sGrBEYh8CvL0JFk-FUd0CZiUQaeunD_9Kvs,4493 +yt_dlp/extractor/weiqitv.py,sha256=ghqlTyBeNl1ZGBOdBL2dqJ6ULa-uzsI0bR86-dLgXvs,1681 +yt_dlp/extractor/whowatch.py,sha256=8hWbTNtYiex5Ohup1VbYgj0J8xNmjAUkK4pUQ_ymCRM,4018 +yt_dlp/extractor/willow.py,sha256=pBnGfNnm0qKYV72JVfk34RwBHTk1lVQS3QzM96nf53g,2394 +yt_dlp/extractor/wimtv.py,sha256=wYDYqdD7VCdZizuPKialto2M8mfjPNKPzfbSdotTcoQ,5734 +yt_dlp/extractor/wistia.py,sha256=1W5mdXiUPEDWLHrwO0HGVKPqFZoJJidg3yYbDmTz5so,7227 +yt_dlp/extractor/worldstarhiphop.py,sha256=9XazCpluCOOXWEJg2ntt6l-zfZEVOohurSCwJ_EMbm4,1344 +yt_dlp/extractor/wppilot.py,sha256=kXiFJExy__S5ISD4aOlj6MDV7wEPcpZ5hIiBWtLkFlo,6203 +yt_dlp/extractor/wsj.py,sha256=97OiTsMSBeKh394rlle1At7lXeEAvHEXQSU8HbgotP0,4694 +yt_dlp/extractor/wwe.py,sha256=6TKA4KCG7Mo-TdIioHmirDBMPSarr9oaxXbYMPdjts0,4532 +yt_dlp/extractor/xbef.py,sha256=86k_CeL6cVVBgxA19jMbK_nNtcqDf4KeuYeoUSfUjDY,1447 +yt_dlp/extractor/xboxclips.py,sha256=-lIwJUAo_jPRq8gQIuIbe5yL2QL0-ozlgnnpEehMdYU,2372 +yt_dlp/extractor/xfileshare.py,sha256=bzpwXkBHULXi5gh9hlbBnebEfMwH72jftbnwsOiEZ1s,7377 +yt_dlp/extractor/xhamster.py,sha256=htQve-v7lZ2oiXRvX0gtQfARsqr6KLNYzI00FcEdroE,18932 +yt_dlp/extractor/xiami.py,sha256=0ER3KPS6J965XQC3m0HduPbY_si8kPUPzRgHvECJt7E,6816 +yt_dlp/extractor/ximalaya.py,sha256=_43O3EPfxTlbLzv3FkjMroYdaU3ddbyaeDXvx5a3WjM,9908 +yt_dlp/extractor/xminus.py,sha256=YBsGlieNPXF-GfDiUwXawbSFnp1VdE3wFP9msLG8wfc,2937 +yt_dlp/extractor/xnxx.py,sha256=kWUgyx72AXM8Hz7ynK1RP6eEMppnKu-h5nQhgym8p6Y,2886 +yt_dlp/extractor/xstream.py,sha256=u44Y1Rp7l4osmB1XPZFeLPVkYSeyaLJfIbFuJTzITRg,3974 +yt_dlp/extractor/xtube.py,sha256=eLE9oQa5vYAFlkFhpXtjcs9un3-iaOGRf0JPSYQZri0,7970 +yt_dlp/extractor/xuite.py,sha256=qGyahXGcz2Q5i49uM6v1WMdlGLcwvMjB01fSwWoB-Ac,5807 +yt_dlp/extractor/xvideos.py,sha256=eAvlLW3XuspGq4loRZcLO5qhVXENoB2pfxZhs4X3wNY,5985 +yt_dlp/extractor/xxxymovies.py,sha256=yPkxbXAdV01wQn6VXGxz-mjkHvqpWoFLsB1Z7mviTvY,2654 +yt_dlp/extractor/yahoo.py,sha256=CqrDEyXcBUNdM5grOnR721mKpy4Jx3l6RPLlErgFhu0,23944 +yt_dlp/extractor/yandexdisk.py,sha256=YQ0XFS8A9fI_Lk4lHFZUEELwsnFkTj_Qy2-TYoh_FXE,5152 +yt_dlp/extractor/yandexmusic.py,sha256=GRfaY8qjgo5hYG7SJhxWB1iqvr2GMVC2gmiMiYKW4ic,17701 +yt_dlp/extractor/yandexvideo.py,sha256=yDp-Kj4irMGETx4dfSC1QtRDqPwRLOVsK0VL9Os3NXk,13386 +yt_dlp/extractor/yapfiles.py,sha256=3tGub_l6pl0X2HDpXBDigVwMQIQ2yPGEcd8XIyaznKw,3212 +yt_dlp/extractor/yesjapan.py,sha256=vKGITT87Ucy7hQ2jNUvgN1VZ7WZBiP6A93jq1LRJudg,2196 +yt_dlp/extractor/yinyuetai.py,sha256=WUkLfnoBatUPSoB2F5badCafa_fnLGgcul7vLkxT_Ig,1908 +yt_dlp/extractor/ynet.py,sha256=oJ1d-c6Ixy1GEXhdefyujGKDJOq3_KB7mXCEU7BsVTM,1807 +yt_dlp/extractor/youjizz.py,sha256=c4hN3FNSOAGMgUn5oMZvUMQI8Gn756awdXR6tXMw7P8,3112 +yt_dlp/extractor/youku.py,sha256=WT56nRk86tvPA7klJxyZullWIqnaLK_ccNRYAc29BWU,11391 +yt_dlp/extractor/younow.py,sha256=C45bWbtXeDMMnVWmmWrPbLI5HNLMgjmMFs8W8U-ftHo,7089 +yt_dlp/extractor/youporn.py,sha256=lZ8uEwSMl1pI_0_rB1jUDalctVCquMYK-9AIrg-_RnE,7290 +yt_dlp/extractor/yourporn.py,sha256=SpTte_H2TRaQ5YqCRUNb-1UDBeA3rpspzig2aNspLCM,2062 +yt_dlp/extractor/yourupload.py,sha256=5ZIEtNNgMJ7cnpIAoqymKA_WBxbwy-zFjD10TpN-hhI,1411 +yt_dlp/extractor/youtube.py,sha256=3FqKofRucD_Rc85Wkmij5xcRHJVROElH0Y1HUdTR0Ag,255036 +yt_dlp/extractor/zapiks.py,sha256=KHjDmba9mMC9SHRtv1lwR2A388IeBI6crIPRfH2O2II,3828 +yt_dlp/extractor/zaq1.py,sha256=oaAeg-ZzI9vDXmAXGWhnrTPVN4UiHuhnRxCPctR_lNw,3293 +yt_dlp/extractor/zattoo.py,sha256=L_0kyxBPcm5I30AzaNrJz5C1TulYGaYTcgi2qjRigic,14066 +yt_dlp/extractor/zdf.py,sha256=fVxKIyeTTO2cZHCe5Dw71-SKGV1iP2mDQkljzWfQTf8,15865 +yt_dlp/extractor/zee5.py,sha256=7MOYgwRolWrvPh8OZd9wrlO7roD9g7-0bGqerDc7ko0,10992 +yt_dlp/extractor/zhihu.py,sha256=AyVCjgIj9wNnDnpH80vKmJR1GIi9vqv8BoEXFsc37FM,2641 +yt_dlp/extractor/zingmp3.py,sha256=YtUmt3GlR-ROIhN6I0hYSxsXnL9zM1BbW6j1XjHBEGI,5552 +yt_dlp/extractor/zoom.py,sha256=L6L9JTphAIDOd0VJ0SNM2ZycLiwrLVmJzIl8HRjl324,2949 +yt_dlp/extractor/zype.py,sha256=Tx2SwLxSWUahj4ZKjAaYegHG9Q1oD3lQNIKGUaavBEo,5761 +yt_dlp/jsinterp.py,sha256=gmRaSOvCZqPIhWfmbPisee6Ax340BgUr0QIU-P_DvTA,21594 +yt_dlp/minicurses.py,sha256=xWEDlq6wrnrad2KYltvR8D8ZYKNnd5Fenb_Je1oiYt8,5241 +yt_dlp/options.py,sha256=5c1eGWFEVqG3ZVgqsdSq69HEpRF4qFZVDmQWJD8itZQ,84321 +yt_dlp/postprocessor/__init__.py,sha256=NF4ltYrH_1wqfOtULwQVyGDIxxlhSbvcOCoL24ZdDtw,1256 +yt_dlp/postprocessor/__pycache__/__init__.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/common.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/embedthumbnail.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/exec.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/ffmpeg.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/metadataparser.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/modify_chapters.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/movefilesafterdownload.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/sponskrub.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/sponsorblock.cpython-310.pyc,, +yt_dlp/postprocessor/__pycache__/xattrpp.cpython-310.pyc,, +yt_dlp/postprocessor/common.py,sha256=Lz__95mhsXcBKZPV4A3-_uIXEwcP1fb5TwPxLP0GjFU,6510 +yt_dlp/postprocessor/embedthumbnail.py,sha256=JhAOizMuCdK_qoqlUn7ZPNCItqNEaabunnEyL_0uQmA,10591 +yt_dlp/postprocessor/exec.py,sha256=XCXzikjZs24t4jhxAeJXIPXexAXgj9F5qNyOSkykskU,1658 +yt_dlp/postprocessor/ffmpeg.py,sha256=4rnHseZEKa8MyrRSwjY1l6yxaT_2neNfQK9DsiE-gqc,48521 +yt_dlp/postprocessor/metadataparser.py,sha256=YqFIgVnqCEq1d8aFLIZAjDcWIKXgTbiLVdj-ZmAT2pE,4236 +yt_dlp/postprocessor/modify_chapters.py,sha256=nUE42jNtTci5zF9IYGYcSsghqhi2e774Ei0vCjH4-Ps,17429 +yt_dlp/postprocessor/movefilesafterdownload.py,sha256=0nqxSRqjHTWDt3in8pRbo_UMMA8Dr4GFg4LKg33A5tw,2088 +yt_dlp/postprocessor/sponskrub.py,sha256=qbGy79cH1ZGp8SJYiSu6-39qWXCIJtpq5oys_nMvkIg,4253 +yt_dlp/postprocessor/sponsorblock.py,sha256=M1zn6G2YufuultUvIBCoerxXjDNcBr5w7ESHx8X5N6k,5014 +yt_dlp/postprocessor/xattrpp.py,sha256=5Ox8wwYm9iVMzwqc8v7OKrSk6HD75j6pWpjBFZT28zw,2870 +yt_dlp/socks.py,sha256=0GytSIZ-7ix5Kl6MO1DmUR9StnJBU7O6d8q-mTBzDOc,8861 +yt_dlp/update.py,sha256=toy_xDaQ86Msoymte9iImGv4GnI3wPuiv3TrXHZgNw8,10327 +yt_dlp/utils.py,sha256=GqXp2eGIg_D6295GRyQt-Jok7j_qWOd5WobnjgQPlc4,173935 +yt_dlp/version.py,sha256=5nDYPbSgswdISGzh930qgDpNYhUCS_apqYvZx2RSHhc,108 +yt_dlp/webvtt.py,sha256=nlyXcTEcxzfCDZGA4j__5Ky24L7v16xpx-lqeCiC7RA,11161 diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/REQUESTED b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/WHEEL b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/WHEEL new file mode 100644 index 0000000..0b18a28 --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/entry_points.txt b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/entry_points.txt new file mode 100644 index 0000000..134eb21 --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +yt-dlp = yt_dlp:main + diff --git a/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/top_level.txt b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/top_level.txt new file mode 100644 index 0000000..a866577 --- /dev/null +++ b/plugins/youtube_download/yt_dlp-2022.2.4.dist-info/top_level.txt @@ -0,0 +1 @@ +yt_dlp diff --git a/plugins/youtube_download/yt_dlp/YoutubeDL.py b/plugins/youtube_download/yt_dlp/YoutubeDL.py new file mode 100644 index 0000000..fd1584a --- /dev/null +++ b/plugins/youtube_download/yt_dlp/YoutubeDL.py @@ -0,0 +1,3874 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +from __future__ import absolute_import, unicode_literals + +import collections +import contextlib +import datetime +import errno +import fileinput +import functools +import io +import itertools +import json +import locale +import operator +import os +import platform +import re +import shutil +import subprocess +import sys +import tempfile +import time +import tokenize +import traceback +import random +import unicodedata + +from enum import Enum +from string import ascii_letters + +from .compat import ( + compat_basestring, + compat_get_terminal_size, + compat_kwargs, + compat_numeric_types, + compat_os_name, + compat_pycrypto_AES, + compat_shlex_quote, + compat_str, + compat_tokenize_tokenize, + compat_urllib_error, + compat_urllib_request, + compat_urllib_request_DataHandler, + windows_enable_vt_mode, +) +from .cookies import load_cookies +from .utils import ( + age_restricted, + args_to_str, + ContentTooShortError, + date_from_str, + DateRange, + DEFAULT_OUTTMPL, + determine_ext, + determine_protocol, + DownloadCancelled, + DownloadError, + encode_compat_str, + encodeFilename, + EntryNotInPlaylist, + error_to_compat_str, + ExistingVideoReached, + expand_path, + ExtractorError, + float_or_none, + format_bytes, + format_field, + format_decimal_suffix, + formatSeconds, + GeoRestrictedError, + get_domain, + HEADRequest, + InAdvancePagedList, + int_or_none, + iri_to_uri, + ISO3166Utils, + join_nonempty, + LazyList, + LINK_TEMPLATES, + locked_file, + make_dir, + make_HTTPS_handler, + MaxDownloadsReached, + network_exceptions, + number_of_digits, + orderedSet, + OUTTMPL_TYPES, + PagedList, + parse_filesize, + PerRequestProxyHandler, + platform_name, + Popen, + POSTPROCESS_WHEN, + PostProcessingError, + preferredencoding, + prepend_extension, + ReExtractInfo, + register_socks_protocols, + RejectedVideoReached, + remove_terminal_sequences, + render_table, + replace_extension, + SameFileError, + sanitize_filename, + sanitize_path, + sanitize_url, + sanitized_Request, + std_headers, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, + str_or_none, + strftime_or_none, + subtitles_filename, + supports_terminal_sequences, + timetuple_from_msec, + to_high_limit_path, + traverse_obj, + try_get, + UnavailableVideoError, + url_basename, + variadic, + version_tuple, + write_json_file, + write_string, + YoutubeDLCookieProcessor, + YoutubeDLHandler, + YoutubeDLRedirectHandler, +) +from .cache import Cache +from .minicurses import format_text +from .extractor import ( + gen_extractor_classes, + get_info_extractor, + _LAZY_LOADER, + _PLUGIN_CLASSES as plugin_extractors +) +from .extractor.openload import PhantomJSwrapper +from .downloader import ( + FFmpegFD, + get_suitable_downloader, + shorten_protocol_name +) +from .downloader.rtmp import rtmpdump_version +from .postprocessor import ( + get_postprocessor, + EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, + FFmpegFixupDurationPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, + FFmpegMergerPP, + FFmpegPostProcessor, + MoveFilesAfterDownloadPP, + _PLUGIN_CLASSES as plugin_postprocessors +) +from .update import detect_variant +from .version import __version__, RELEASE_GIT_HEAD + +if compat_os_name == 'nt': + import ctypes + + +class YoutubeDL(object): + """YoutubeDL class. + + YoutubeDL objects are the ones responsible of downloading the + actual video file and writing it to disk if the user has requested + it, among some other tasks. In most cases there should be one per + program. As, given a video URL, the downloader doesn't know how to + extract all the needed information, task that InfoExtractors do, it + has to pass the URL to one of them. + + For this, YoutubeDL objects have a method that allows + InfoExtractors to be registered in a given order. When it is passed + a URL, the YoutubeDL object handles it to the first InfoExtractor it + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + YoutubeDL process the extracted information, possibly using a File + Downloader to download the video. + + YoutubeDL objects accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. These options are available through the params + attribute for the InfoExtractors to use. The YoutubeDL also + registers itself as the downloader in charge for the InfoExtractors + that are added to it, so this is a "mutual registration". + + Available options: + + username: Username for authentication purposes. + password: Password for authentication purposes. + videopassword: Password for accessing a video. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. + usenetrc: Use netrc for authentication instead. + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + no_warnings: Do not print out anything for warnings. + forceprint: A dict with keys WHEN mapped to a list of templates to + print to stdout. The allowed keys are video or any of the + items in utils.POSTPROCESS_WHEN. + For compatibility, a single list is also accepted + print_to_file: A dict with keys WHEN (same as forceprint) mapped to + a list of tuples with (template, filename) + forceurl: Force printing final URL. (Deprecated) + forcetitle: Force printing title. (Deprecated) + forceid: Force printing ID. (Deprecated) + forcethumbnail: Force printing thumbnail URL. (Deprecated) + forcedescription: Force printing description. (Deprecated) + forcefilename: Force printing final filename. (Deprecated) + forceduration: Force printing duration. (Deprecated) + forcejson: Force printing info_dict as JSON. + dump_single_json: Force printing the info_dict of the whole playlist + (or video) as a single JSON line. + force_write_download_archive: Force writing download archive regardless + of 'skip_download' or 'simulate'. + simulate: Do not download the video files. If unset (or None), + simulate only if listsubtitles, listformats or list_thumbnails is used + format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation + allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. + ignore_no_formats_error: Ignore "No video formats" error. Usefull for + extracting metadata even if the video is not actually + available for download (experimental) + format_sort: A list of fields by which to sort the video formats. + See "Sorting Formats" for more details. + format_sort_force: Force the given format_sort. see "Sorting Formats" + for more details. + allow_multiple_video_streams: Allow multiple video streams to be merged + into a single file + allow_multiple_audio_streams: Allow multiple audio streams to be merged + into a single file + check_formats Whether to test if the formats are downloadable. + Can be True (check all), False (check none), + 'selected' (check selected formats), + or None (check only if requested by extractor) + paths: Dictionary of output paths. The allowed keys are 'home' + 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + outtmpl: Dictionary of templates for output names. Allowed keys + are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + For compatibility with youtube-dl, a single string can also be used + outtmpl_na_placeholder: Placeholder for unavailable meta fields. + restrictfilenames: Do not allow "&" and spaces in file names + trim_file_name: Limit length of filename (extension excluded) + windowsfilenames: Force the filenames to be windows compatible + ignoreerrors: Do not stop on download/postprocessing errors. + Can be 'only_download' to ignore only download errors. + Default is 'only_download' for CLI, but False for API + skip_playlist_after_errors: Number of allowed failures until the rest of + the playlist is skipped + force_generic_extractor: Force downloader to use the generic extractor + overwrites: Overwrite all video and metadata files if True, + overwrite only non-video files if None + and don't overwrite any file if False + For compatibility with youtube-dl, + "nooverwrites" may also be used instead + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. + playlistreverse: Download playlist items in reverse order. + playlistrandom: Download playlist items in random order. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logger: Log messages to a logging.Logger instance. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + clean_infojson: Remove private fields from the infojson + getcomments: Extract video comments. This will not be written to disk + unless writeinfojson is also given + writeannotations: Write the video annotations to a .annotations.xml file + writethumbnail: Write the thumbnail image to a file + allow_playlist_files: Whether to write playlists' description, infojson etc + also to disk when using the 'write*' options + write_all_thumbnails: Write all thumbnail formats to files + writelink: Write an internet shortcut file, depending on the + current platform (.url/.webloc/.desktop) + writeurllink: Write a Windows internet shortcut file (.url) + writewebloclink: Write a macOS internet shortcut file (.webloc) + writedesktoplink: Write a Linux internet shortcut file (.desktop) + writesubtitles: Write the video subtitles to a file + writeautomaticsub: Write the automatically generated subtitles to a file + allsubtitles: Deprecated - Use subtitleslangs = ['all'] + Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) + listsubtitles: Lists all available subtitles for the video + subtitlesformat: The format code for subtitles + subtitleslangs: List of languages of the subtitles to download (can be regex). + The list may contain "all" to refer to all the available + subtitles. The language can be prefixed with a "-" to + exclude it from the requested languages. Eg: ['all', '-live_chat'] + keepvideo: Keep the video file after post-processing + daterange: A DateRange object, download only if the upload_date is in the range. + skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + False to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. + min_views: An integer representing the minimum view count the video + must have in order to not be skipped. + Videos without view count information are always + downloaded. None for no limit. + max_views: An integer representing the maximum view count. + Videos that are more popular than that are not + downloaded. + Videos without view count information are always + downloaded. None for no limit. + download_archive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. + break_on_existing: Stop the download process after attempting to download a + file that is in the archive. + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + break_per_url: Whether break_on_reject and break_on_existing + should act on each input URL as opposed to for the entire queue + cookiefile: File name where cookies should be read from and dumped to + cookiesfrombrowser: A tuple containing the name of the browser, the profile + name/pathfrom where cookies are loaded, and the name of the + keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') + legacyserverconnect: Explicitly allow HTTPS connection to servers that do not + support RFC 5746 secure renegotiation + nocheckcertificate: Do not verify SSL certificates + prefer_insecure: Use HTTP instead of HTTPS to retrieve information. + At the moment, this is only supported by YouTube. + proxy: URL of the proxy server to use + geo_verification_proxy: URL of the proxy to use for IP address verification + on geo-restricted sites. + socket_timeout: Time to wait for unresponsive hosts, in seconds + bidi_workaround: Work around buggy terminals without bidirectional text + support, using fridibi + debug_printtraffic:Print out sent and received HTTP traffic + include_ads: Download ads as well (deprecated) + default_search: Prepend this string if an input url is not valid. + 'auto' for elaborate guessing + encoding: Use this encoding instead of the system-specified. + extract_flat: Do not resolve URLs, return the immediate result. + Pass in 'in_playlist' to only show this behavior for + playlist items. + wait_for_video: If given, wait for scheduled streams to become available. + The value should be a tuple containing the range + (min_secs, max_secs) to wait between retries + postprocessors: A list of dictionaries, each with an entry + * key: The name of the postprocessor. See + yt_dlp/postprocessor/__init__.py for a list. + * when: When to run the postprocessor. Allowed values are + the entries of utils.POSTPROCESS_WHEN + Assumed to be 'post_process' if not given + post_hooks: Deprecated - Register a custom postprocessor instead + A list of functions that get called as the final step + for each video file, after all postprocessors have been + called. The filename will be passed as the only argument. + progress_hooks: A list of functions that get called on download + progress, with a dictionary with the entries + * status: One of "downloading", "error", or "finished". + Check this first and ignore unknown values. + * info_dict: The extracted info_dict + + If status is one of "downloading", or "finished", the + following properties may also be present: + * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to + * downloaded_bytes: Bytes on disk + * total_bytes: Size of the whole file, None if unknown + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if + unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) + + Progress hooks are guaranteed to be called at least once + (with status "finished") if the download is successful. + postprocessor_hooks: A list of functions that get called on postprocessing + progress, with a dictionary with the entries + * status: One of "started", "processing", or "finished". + Check this first and ignore unknown values. + * postprocessor: Name of the postprocessor + * info_dict: The extracted info_dict + + Progress hooks are guaranteed to be called at least twice + (with status "started" and "finished") if the processing is successful. + merge_output_format: Extension to use when merging formats. + final_ext: Expected final extension; used to detect when the file was + already downloaded and converted + fixup: Automatically correct known faults of the file. + One of: + - "never": do nothing + - "warn": only emit a warning + - "detect_or_warn": check whether we can do anything + about it, warn otherwise (default) + source_address: Client-side IP address to bind to. + call_home: Boolean, true iff we are allowed to contact the + yt-dlp servers for debugging. (BROKEN) + sleep_interval_requests: Number of seconds to sleep between requests + during extraction + sleep_interval: Number of seconds to sleep before each download when + used alone or a lower bound of a range for randomized + sleep before each download (minimum possible number + of seconds to sleep) when used along with + max_sleep_interval. + max_sleep_interval:Upper bound of a range for randomized sleep before each + download (maximum possible number of seconds to sleep). + Must only be used along with sleep_interval. + Actual sleep time will be a random float from range + [sleep_interval; max_sleep_interval]. + sleep_interval_subtitles: Number of seconds to sleep before each subtitle download + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. + match_filter: A function that gets called with the info_dict of + every video. + If it returns a message, the video is ignored. + If it returns None, the video is downloaded. + match_filter_func in utils.py is one example for this. + no_color: Do not emit color codes in output. + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For + HTTP header + geo_bypass_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header + geo_bypass_ip_block: + IP range in CIDR notation that will be used similarly to + geo_bypass_country + + The following options determine which downloader is picked: + external_downloader: A dictionary of protocol keys and the executable of the + external downloader to use for it. The allowed protocols + are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. + Set the value to 'native' to use the native downloader + hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'} + or {'m3u8': 'ffmpeg'} instead. + Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. + compat_opts: Compatibility options. See "Differences in default behavior". + The following options do not work when used through the API: + filename, abort-on-error, multistreams, no-live-chat, format-sort + no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. + Refer __init__.py for their implementation + progress_template: Dictionary of templates for progress outputs. + Allowed keys are 'download', 'postprocess', + 'download-title' (console title) and 'postprocess-title'. + The template is mapped on a dictionary with keys 'progress' and 'info' + + The following parameters are not used by YoutubeDL itself, they are used by + the downloader (see yt_dlp/downloader/common.py): + nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, + max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, + continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + external_downloader_args, concurrent_fragment_downloads. + + The following options are used by the post processors: + prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, + otherwise prefer ffmpeg. (avconv support is deprecated) + ffmpeg_location: Location of the ffmpeg/avconv binary; either the path + to the binary or its containing directory. + postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) + and a list of additional command-line arguments for the + postprocessor/executable. The dict can also have "PP+EXE" keys + which are used when the given exe is used by the given PP. + Use 'default' as the name for arguments to passed to all PP + For compatibility with youtube-dl, a single list of args + can also be used + + The following options are used by the extractors: + extractor_retries: Number of times to retry for known errors + dynamic_mpd: Whether to process dynamic DASH manifests (default: True) + hls_split_discontinuity: Split HLS playlists to different formats at + discontinuities such as ad breaks (default: False) + extractor_args: A dictionary of arguments to be passed to the extractors. + See "EXTRACTOR ARGUMENTS" for details. + Eg: {'youtube': {'skip': ['dash', 'hls']}} + mark_watched: Mark videos watched (even with --simulate). Only for YouTube + youtube_include_dash_manifest: Deprecated - Use extractor_args instead. + If True (default), DASH manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about DASH. (only for youtube) + youtube_include_hls_manifest: Deprecated - Use extractor_args instead. + If True (default), HLS manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about HLS. (only for youtube) + """ + + _NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'timestamp', 'release_timestamp', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', + )) + + _format_selection_exts = { + 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, + 'video': {'mp4', 'flv', 'webm', '3gp'}, + 'storyboards': {'mhtml'}, + } + + params = None + _ies = {} + _pps = {k: [] for k in POSTPROCESS_WHEN} + _printed_messages = set() + _first_webpage_request = True + _download_retcode = None + _num_downloads = None + _playlist_level = 0 + _playlist_urls = set() + _screen_file = None + + def __init__(self, params=None, auto_init=True): + """Create a FileDownloader object with the given options. + @param auto_init Whether to load the default extractors and print header (if verbose). + Set to 'no_verbose_header' to not print the header + """ + if params is None: + params = {} + self._ies = {} + self._ies_instances = {} + self._pps = {k: [] for k in POSTPROCESS_WHEN} + self._printed_messages = set() + self._first_webpage_request = True + self._post_hooks = [] + self._progress_hooks = [] + self._postprocessor_hooks = [] + self._download_retcode = 0 + self._num_downloads = 0 + self._num_videos = 0 + self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + self._err_file = sys.stderr + self.params = params + self.cache = Cache(self) + + windows_enable_vt_mode() + self._allow_colors = { + 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), + 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), + } + + if sys.version_info < (3, 6): + self.report_warning( + 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) + + if self.params.get('allow_unplayable_formats'): + self.report_warning( + f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' + 'This is a developer option intended for debugging. \n' + ' If you experience any issues while using this option, ' + f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') + + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): + if self.params.get('geo_verification_proxy') is None: + self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') + + for msg in self.params.get('_warnings', []): + self.report_warning(msg) + for msg in self.params.get('_deprecation_warnings', []): + self.deprecation_warning(msg) + + if 'list-formats' in self.params.get('compat_opts', []): + self.params['listformats_table'] = False + + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: + # nooverwrites was unnecessarily changed to overwrites + # in 0c3d0f51778b153f65c21906031c2e091fcfb641 + # This ensures compatibility with both keys + self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) + else: + self.params['nooverwrites'] = not self.params['overwrites'] + + self.params.setdefault('forceprint', {}) + self.params.setdefault('print_to_file', {}) + + # Compatibility with older syntax + if not isinstance(params['forceprint'], dict): + self.params['forceprint'] = {'video': params['forceprint']} + + if self.params.get('bidi_workaround', False): + try: + import pty + master, slave = pty.openpty() + width = compat_get_terminal_size().columns + if width is None: + width_args = [] + else: + width_args = ['-w', str(width)] + sp_kwargs = dict( + stdin=subprocess.PIPE, + stdout=slave, + stderr=self._err_file) + try: + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + except OSError: + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_channel = os.fdopen(master, 'rb') + except OSError as ose: + if ose.errno == errno.ENOENT: + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') + else: + raise + + if (sys.platform != 'win32' + and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not self.params.get('restrictfilenames', False)): + # Unicode filesystem API will throw errors (#1474, #13027) + self.report_warning( + 'Assuming --restrict-filenames since file system encoding ' + 'cannot encode all characters. ' + 'Set the LC_ALL environment variable to fix this.') + self.params['restrictfilenames'] = True + + self.outtmpl_dict = self.parse_outtmpl() + + # Creating format selector here allows us to catch syntax errors before the extraction + self.format_selector = ( + self.params.get('format') if self.params.get('format') in (None, '-') + else self.params['format'] if callable(self.params['format']) + else self.build_format_selector(self.params['format'])) + + self._setup_opener() + + if auto_init: + if auto_init != 'no_verbose_header': + self.print_debug_header() + self.add_default_info_extractors() + + hooks = { + 'post_hooks': self.add_post_hook, + 'progress_hooks': self.add_progress_hook, + 'postprocessor_hooks': self.add_postprocessor_hook, + } + for opt, fn in hooks.items(): + for ph in self.params.get(opt, []): + fn(ph) + + for pp_def_raw in self.params.get('postprocessors', []): + pp_def = dict(pp_def_raw) + when = pp_def.pop('when', 'post_process') + self.add_post_processor( + get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + when=when) + + register_socks_protocols() + + def preload_download_archive(fn): + """Preload the archive, if any is specified""" + if fn is None: + return False + self.write_debug(f'Loading archive file {fn!r}') + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + self.archive.add(line.strip()) + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + return True + + self.archive = set() + preload_download_archive(self.params.get('download_archive')) + + def warn_if_short_id(self, argv): + # short YouTube ID starting with dash? + idxs = [ + i for i, a in enumerate(argv) + if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] + if idxs: + correct_argv = ( + ['yt-dlp'] + + [a for i, a in enumerate(argv) if i not in idxs] + + ['--'] + [argv[i] for i in idxs] + ) + self.report_warning( + 'Long argument string detected. ' + 'Use -- to separate parameters and URLs, like this:\n%s' % + args_to_str(correct_argv)) + + def add_info_extractor(self, ie): + """Add an InfoExtractor object to the end of the list.""" + ie_key = ie.ie_key() + self._ies[ie_key] = ie + if not isinstance(ie, type): + self._ies_instances[ie_key] = ie + ie.set_downloader(self) + + def _get_info_extractor_class(self, ie_key): + ie = self._ies.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key) + self.add_info_extractor(ie) + return ie + + def get_info_extractor(self, ie_key): + """ + Get an instance of an IE with name ie_key, it will try to get one from + the _ies list, if there's no instance it will create a new one and add + it to the extractor list. + """ + ie = self._ies_instances.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key)() + self.add_info_extractor(ie) + return ie + + def add_default_info_extractors(self): + """ + Add the InfoExtractors returned by gen_extractors to the end of the list + """ + for ie in gen_extractor_classes(): + self.add_info_extractor(ie) + + def add_post_processor(self, pp, when='post_process'): + """Add a PostProcessor object to the end of the chain.""" + self._pps[when].append(pp) + pp.set_downloader(self) + + def add_post_hook(self, ph): + """Add the post hook""" + self._post_hooks.append(ph) + + def add_progress_hook(self, ph): + """Add the download progress hook""" + self._progress_hooks.append(ph) + + def add_postprocessor_hook(self, ph): + """Add the postprocessing progress hook""" + self._postprocessor_hooks.append(ph) + for pps in self._pps.values(): + for pp in pps: + pp.add_progress_hook(ph) + + def _bidi_workaround(self, message): + if not hasattr(self, '_output_channel'): + return message + + assert hasattr(self, '_output_process') + assert isinstance(message, compat_str) + line_count = message.count('\n') + 1 + self._output_process.stdin.write((message + '\n').encode('utf-8')) + self._output_process.stdin.flush() + res = ''.join(self._output_channel.readline().decode('utf-8') + for _ in range(line_count)) + return res[:-len('\n')] + + def _write_string(self, message, out=None, only_once=False): + if only_once: + if message in self._printed_messages: + return + self._printed_messages.add(message) + write_string(message, out=out, encoding=self.params.get('encoding')) + + def to_stdout(self, message, skip_eol=False, quiet=False): + """Print message to stdout""" + if self.params.get('logger'): + self.params['logger'].debug(message) + elif not quiet or self.params.get('verbose'): + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._err_file if quiet else self._screen_file) + + def to_stderr(self, message, only_once=False): + """Print message to stderr""" + assert isinstance(message, compat_str) + if self.params.get('logger'): + self.params['logger'].error(message) + else: + self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once) + + def to_console_title(self, message): + if not self.params.get('consoletitle', False): + return + message = remove_terminal_sequences(message) + if compat_os_name == 'nt': + if ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + elif 'TERM' in os.environ: + self._write_string('\033]0;%s\007' % message, self._screen_file) + + def save_console_title(self): + if not self.params.get('consoletitle', False): + return + if self.params.get('simulate'): + return + if compat_os_name != 'nt' and 'TERM' in os.environ: + # Save the title on stack + self._write_string('\033[22;0t', self._screen_file) + + def restore_console_title(self): + if not self.params.get('consoletitle', False): + return + if self.params.get('simulate'): + return + if compat_os_name != 'nt' and 'TERM' in os.environ: + # Restore the title from stack + self._write_string('\033[23;0t', self._screen_file) + + def __enter__(self): + self.save_console_title() + return self + + def __exit__(self, *args): + self.restore_console_title() + + if self.params.get('cookiefile') is not None: + self.cookiejar.save(ignore_discard=True, ignore_expires=True) + + def trouble(self, message=None, tb=None, is_error=True): + """Determine action to take when a download problem appears. + + Depending on if the downloader has been configured to ignore + download errors or not, this method may throw an exception or + not when errors are found, after printing the message. + + @param tb If given, is additional traceback information + @param is_error Whether to raise error according to ignorerrors + """ + if message is not None: + self.to_stderr(message) + if self.params.get('verbose'): + if tb is None: + if sys.exc_info()[0]: # if .trouble has been called from an except block + tb = '' + if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) + tb += encode_compat_str(traceback.format_exc()) + else: + tb_data = traceback.format_list(traceback.extract_stack()) + tb = ''.join(tb_data) + if tb: + self.to_stderr(tb) + if not is_error: + return + if not self.params.get('ignoreerrors'): + if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + exc_info = sys.exc_info()[1].exc_info + else: + exc_info = sys.exc_info() + raise DownloadError(message, exc_info) + self._download_retcode = 1 + + def to_screen(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode""" + self.to_stdout( + message, skip_eol, quiet=self.params.get('quiet', False)) + + class Styles(Enum): + HEADERS = 'yellow' + EMPHASIS = 'light blue' + ID = 'green' + DELIM = 'blue' + ERROR = 'red' + WARNING = 'yellow' + SUPPRESS = 'light black' + + def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): + if test_encoding: + original_text = text + encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') + text = text.encode(encoding, 'ignore').decode(encoding) + if fallback is not None and text != original_text: + text = fallback + if isinstance(f, self.Styles): + f = f.value + return format_text(text, f) if allow_colors else text if fallback is None else fallback + + def _format_screen(self, *args, **kwargs): + return self._format_text( + self._screen_file, self._allow_colors['screen'], *args, **kwargs) + + def _format_err(self, *args, **kwargs): + return self._format_text( + self._err_file, self._allow_colors['err'], *args, **kwargs) + + def report_warning(self, message, only_once=False): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if self.params.get('logger') is not None: + self.params['logger'].warning(message) + else: + if self.params.get('no_warnings'): + return + self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) + + def deprecation_warning(self, message): + if self.params.get('logger') is not None: + self.params['logger'].warning('DeprecationWarning: {message}') + else: + self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) + + def report_error(self, message, *args, **kwargs): + ''' + Do the same as trouble, but prefixes the message with 'ERROR:', colored + in red if stderr is a tty file. + ''' + self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) + + def write_debug(self, message, only_once=False): + '''Log debug message or Print message to stderr''' + if not self.params.get('verbose', False): + return + message = '[debug] %s' % message + if self.params.get('logger'): + self.params['logger'].debug(message) + else: + self.to_stderr(message, only_once) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + try: + self.to_screen('[download] %s has already been downloaded' % file_name) + except UnicodeEncodeError: + self.to_screen('[download] The file has already been downloaded') + + def report_file_delete(self, file_name): + """Report that existing file will be deleted.""" + try: + self.to_screen('Deleting existing file %s' % file_name) + except UnicodeEncodeError: + self.to_screen('Deleting existing file') + + def raise_no_formats(self, info, forced=False): + has_drm = info.get('__has_drm') + msg = 'This video is DRM protected' if has_drm else 'No video formats found!' + expected = self.params.get('ignore_no_formats_error') + if forced or not expected: + raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], + expected=has_drm or expected) + else: + self.report_warning(msg) + + def parse_outtmpl(self): + outtmpl_dict = self.params.get('outtmpl', {}) + if not isinstance(outtmpl_dict, dict): + outtmpl_dict = {'default': outtmpl_dict} + # Remove spaces in the default template + if self.params.get('restrictfilenames'): + sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') + else: + sanitize = lambda x: x + outtmpl_dict.update({ + k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() + if outtmpl_dict.get(k) is None}) + for key, val in outtmpl_dict.items(): + if isinstance(val, bytes): + self.report_warning( + 'Parameter outtmpl is bytes, but should be a unicode string. ' + 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + return outtmpl_dict + + def get_output_path(self, dir_type='', filename=None): + paths = self.params.get('paths', {}) + assert isinstance(paths, dict) + path = os.path.join( + expand_path(paths.get('home', '').strip()), + expand_path(paths.get(dir_type, '').strip()) if dir_type else '', + filename or '') + + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + path = encodeFilename(path, True).decode(preferredencoding()) + return sanitize_path(path, force=self.params.get('windowsfilenames')) + + @staticmethod + def _outtmpl_expandpath(outtmpl): + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + return expand_path(outtmpl).replace(sep, '') + + @staticmethod + def escape_outtmpl(outtmpl): + ''' Escape any remaining strings like %s, %abc% etc. ''' + return re.sub( + STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), + lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), + outtmpl) + + @classmethod + def validate_outtmpl(cls, outtmpl): + ''' @return None or Exception object ''' + outtmpl = re.sub( + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'), + lambda mobj: f'{mobj.group(0)[:-1]}s', + cls._outtmpl_expandpath(outtmpl)) + try: + cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) + return None + except ValueError as err: + return err + + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + return info_dict + + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict + @param sanitize Whether to sanitize the output as a filename. + For backward compatibility, a function can also be passed + """ + + info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set + + info_dict = self._copy_infodict(info_dict) + info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs + formatSeconds(info_dict['duration'], '-' if sanitize else ':') + if info_dict.get('duration', None) is not None + else None) + info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['video_autonumber'] = self._num_videos + if info_dict.get('resolution') is None: + info_dict['resolution'] = self.format_resolution(info_dict, default=None) + + # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), + 'autonumber': self.params.get('autonumber_size') or 5, + } + + TMPL_DICT = {} + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]')) + MATH_FUNCTIONS = { + '+': float.__add__, + '-': float.__sub__, + } + # Field is of the form key1.key2... + # where keys (except first) can be string, int or slice + FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') + MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + INTERNAL_FORMAT_RE = re.compile(r'''(?x) + (?P-)? + (?P{field}) + (?P(?:{math_op}{math_field})*) + (?:>(?P.+?))? + (?P(?.*?))? + (?:\|(?P.*?))? + $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + + def _traverse_infodict(k): + k = k.split('.') + if k[0] == '': + k.pop(0) + return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) + + def get_value(mdict): + # Object traversal + value = _traverse_infodict(mdict['fields']) + # Negative + if mdict['negate']: + value = float_or_none(value) + if value is not None: + value *= -1 + # Do maths + offset_key = mdict['maths'] + if offset_key: + value = float_or_none(value) + operator = None + while offset_key: + item = re.match( + MATH_FIELD_RE if operator else MATH_OPERATORS_RE, + offset_key).group(0) + offset_key = offset_key[len(item):] + if operator is None: + operator = MATH_FUNCTIONS[item] + continue + item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) + offset = float_or_none(item) + if offset is None: + offset = float_or_none(_traverse_infodict(item)) + try: + value = operator(value, multiplier * offset) + except (TypeError, ZeroDivisionError): + return None + operator = None + # Datetime formatting + if mdict['strf_format']: + value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) + + return value + + na = self.params.get('outtmpl_na_placeholder', 'NA') + + def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): + return sanitize_filename(str(value), restricted=restricted, + is_id=re.search(r'(^|[_.])id(\.|$)', key)) + + sanitizer = sanitize if callable(sanitize) else filename_sanitizer + sanitize = bool(sanitize) + + def _dumpjson_default(obj): + if isinstance(obj, (set, LazyList)): + return list(obj) + return repr(obj) + + def create_key(outer_mobj): + if not outer_mobj.group('has_key'): + return outer_mobj.group(0) + key = outer_mobj.group('key') + mobj = re.match(INTERNAL_FORMAT_RE, key) + initial_field = mobj.group('fields') if mobj else '' + value, replacement, default = None, None, na + while mobj: + mobj = mobj.groupdict() + default = mobj['default'] if mobj['default'] is not None else default + value = get_value(mobj) + replacement = mobj['replacement'] + if value is None and mobj['alternate']: + mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) + else: + break + + fmt = outer_mobj.group('format') + if fmt == 's' and value is not None and key in field_size_compat_map.keys(): + fmt = '0{:d}d'.format(field_size_compat_map[key]) + + value = default if value is None else value if replacement is None else replacement + + flags = outer_mobj.group('conversion') or '' + str_fmt = f'{fmt[:-1]}s' + if fmt[-1] == 'l': # list + delim = '\n' if '#' in flags else ', ' + value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt + elif fmt[-1] == 'j': # json + value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt + elif fmt[-1] == 'q': # quoted + value = map(str, variadic(value) if '#' in flags else [value]) + value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt + elif fmt[-1] == 'B': # bytes + value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value, fmt = value.decode('utf-8', 'ignore'), 's' + elif fmt[-1] == 'U': # unicode normalized + value, fmt = unicodedata.normalize( + # "+" = compatibility equivalence, "#" = NFD + 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + value), str_fmt + elif fmt[-1] == 'D': # decimal suffix + num_fmt, fmt = fmt[:-1].replace('#', ''), 's' + value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s', + factor=1024 if '#' in flags else 1000) + elif fmt[-1] == 'S': # filename sanitization + value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt + elif fmt[-1] == 'c': + if value: + value = str(value)[0] + else: + fmt = str_fmt + elif fmt[-1] not in 'rs': # numeric + value = float_or_none(value) + if value is None: + value, fmt = default, 's' + + if sanitize: + if fmt[-1] == 'r': + # If value is an object, sanitize might convert it to a string + # So we convert it to repr first + value, fmt = repr(value), str_fmt + if fmt[-1] in 'csr': + value = sanitizer(initial_field, value) + + key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + TMPL_DICT[key] = value + return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) + + return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT + + def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + + def _prepare_filename(self, info_dict, tmpl_type='default'): + try: + outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) + filename = self.evaluate_outtmpl(outtmpl, info_dict, True) + if not filename: + return None + + if tmpl_type in ('default', 'temp'): + final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') + if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): + filename = replace_extension(filename, ext, final_ext) + else: + force_ext = OUTTMPL_TYPES[tmpl_type] + if force_ext: + filename = replace_extension(filename, force_ext, info_dict.get('ext')) + + # https://github.com/blackjack4494/youtube-dlc/issues/85 + trim_file_name = self.params.get('trim_file_name', False) + if trim_file_name: + no_ext, *ext = filename.rsplit('.', 2) + filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') + + return filename + except ValueError as err: + self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') + return None + + def prepare_filename(self, info_dict, dir_type='', warn=False): + """Generate the output filename.""" + + filename = self._prepare_filename(info_dict, dir_type or 'default') + if not filename and dir_type not in ('', 'temp'): + return '' + + if warn: + if not self.params.get('paths'): + pass + elif filename == '-': + self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) + elif os.path.isabs(filename): + self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) + if filename == '-' or not filename: + return filename + + return self.get_output_path(dir_type, filename) + + def _match_entry(self, info_dict, incomplete=False, silent=False): + """ Returns None if the file should be downloaded """ + + video_title = info_dict.get('title', info_dict.get('id', 'video')) + + def check_filter(): + if 'title' in info_dict: + # This can happen when we're just evaluating the playlist + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle: + if not re.search(matchtitle, title, re.IGNORECASE): + return '"' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle: + if re.search(rejecttitle, title, re.IGNORECASE): + return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + date = info_dict.get('upload_date') + if date is not None: + dateRange = self.params.get('daterange', DateRange()) + if date not in dateRange: + return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + view_count = info_dict.get('view_count') + if view_count is not None: + min_views = self.params.get('min_views') + if min_views is not None and view_count < min_views: + return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) + max_views = self.params.get('max_views') + if max_views is not None and view_count > max_views: + return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % video_title + + match_filter = self.params.get('match_filter') + if match_filter is not None: + try: + ret = match_filter(info_dict, incomplete=incomplete) + except TypeError: + # For backward compatibility + ret = None if incomplete else match_filter(info_dict) + if ret is not None: + return ret + return None + + if self.in_download_archive(info_dict): + reason = '%s has already been recorded in the archive' % video_title + break_opt, break_err = 'break_on_existing', ExistingVideoReached + else: + reason = check_filter() + break_opt, break_err = 'break_on_reject', RejectedVideoReached + if reason is not None: + if not silent: + self.to_screen('[download] ' + reason) + if self.params.get(break_opt, False): + raise break_err() + return reason + + @staticmethod + def add_extra_info(info_dict, extra_info): + '''Set the keys from extra_info in info dict if they are missing''' + for key, value in extra_info.items(): + info_dict.setdefault(key, value) + + def extract_info(self, url, download=True, ie_key=None, extra_info=None, + process=True, force_generic_extractor=False): + """ + Return a list with a dictionary for each video extracted. + + Arguments: + url -- URL to extract + + Keyword arguments: + download -- whether to download videos during extraction + ie_key -- extractor key hint + extra_info -- dictionary containing the extra values to add to each result + process -- whether to resolve all unresolved references (URLs, playlist items), + must be True for download to work. + force_generic_extractor -- force using the generic extractor + """ + + if extra_info is None: + extra_info = {} + + if not ie_key and force_generic_extractor: + ie_key = 'Generic' + + if ie_key: + ies = {ie_key: self._get_info_extractor_class(ie_key)} + else: + ies = self._ies + + for ie_key, ie in ies.items(): + if not ie.suitable(url): + continue + + if not ie.working(): + self.report_warning('The program functionality for this site has been marked as broken, ' + 'and will probably not work.') + + temp_id = ie.get_temp_id(url) + if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): + self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive') + if self.params.get('break_on_existing', False): + raise ExistingVideoReached() + break + return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) + else: + self.report_error('no suitable InfoExtractor for URL %s' % url) + + def __handle_extraction_exceptions(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + while True: + try: + return func(self, *args, **kwargs) + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): + raise + except ReExtractInfo as e: + if e.expected: + self.to_screen(f'{e}; Re-extracting data') + else: + self.to_stderr('\r') + self.report_warning(f'{e}; Re-extracting data') + continue + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + except ExtractorError as e: # An error we somewhat expected + self.report_error(str(e), e.format_traceback()) + except Exception as e: + if self.params.get('ignoreerrors'): + self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) + else: + raise + break + return wrapper + + def _wait_for_video(self, ie_result): + if (not self.params.get('wait_for_video') + or ie_result.get('_type', 'video') != 'video' + or ie_result.get('formats') or ie_result.get('url')): + return + + format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] + last_msg = '' + + def progress(msg): + nonlocal last_msg + self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True) + last_msg = msg + + min_wait, max_wait = self.params.get('wait_for_video') + diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) + if diff is None and ie_result.get('live_status') == 'is_upcoming': + diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait) + self.report_warning('Release time of video is not known') + elif (diff or 0) <= 0: + self.report_warning('Video should already be available according to extracted info') + diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) + self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') + + wait_till = time.time() + diff + try: + while True: + diff = wait_till - time.time() + if diff <= 0: + progress('') + raise ReExtractInfo('[wait] Wait period ended', expected=True) + progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') + time.sleep(1) + except KeyboardInterrupt: + progress('') + raise ReExtractInfo('[wait] Interrupted by user', expected=True) + except BaseException as e: + if not isinstance(e, ReExtractInfo): + self.to_screen('') + raise + + @__handle_extraction_exceptions + def __extract_info(self, url, ie, download, extra_info, process): + ie_result = ie.extract(url) + if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) + return + if isinstance(ie_result, list): + # Backwards compatibility: old IE result format + ie_result = { + '_type': 'compat_list', + 'entries': ie_result, + } + if extra_info.get('original_url'): + ie_result.setdefault('original_url', extra_info['original_url']) + self.add_default_extra_info(ie_result, ie, url) + if process: + self._wait_for_video(ie_result) + return self.process_ie_result(ie_result, download, extra_info) + else: + return ie_result + + def add_default_extra_info(self, ie_result, ie, url): + if url is not None: + self.add_extra_info(ie_result, { + 'webpage_url': url, + 'original_url': url, + 'webpage_url_basename': url_basename(url), + 'webpage_url_domain': get_domain(url), + }) + if ie is not None: + self.add_extra_info(ie_result, { + 'extractor': ie.IE_NAME, + 'extractor_key': ie.ie_key(), + }) + + def process_ie_result(self, ie_result, download=True, extra_info=None): + """ + Take the result of the ie(may be modified) and resolve all unresolved + references (URLs, playlist items). + + It will also download the videos if 'download'. + Returns the resolved ie_result. + """ + if extra_info is None: + extra_info = {} + result_type = ie_result.get('_type', 'video') + + if result_type in ('url', 'url_transparent'): + ie_result['url'] = sanitize_url(ie_result['url']) + if ie_result.get('original_url'): + extra_info.setdefault('original_url', ie_result['original_url']) + + extract_flat = self.params.get('extract_flat', False) + if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) + or extract_flat is True): + info_copy = ie_result.copy() + ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) + if ie and not ie_result.get('id'): + info_copy['id'] = ie.get_temp_id(ie_result['url']) + self.add_default_extra_info(info_copy, ie, ie_result['url']) + self.add_extra_info(info_copy, extra_info) + info_copy, _ = self.pre_process(info_copy) + self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) + if self.params.get('force_write_download_archive', False): + self.record_download_archive(info_copy) + return ie_result + + if result_type == 'video': + self.add_extra_info(ie_result, extra_info) + ie_result = self.process_video_result(ie_result, download=download) + additional_urls = (ie_result or {}).get('additional_urls') + if additional_urls: + # TODO: Improve MetadataParserPP to allow setting a list + if isinstance(additional_urls, compat_str): + additional_urls = [additional_urls] + self.to_screen( + '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + ie_result['additional_entries'] = [ + self.extract_info( + url, download, extra_info=extra_info, + force_generic_extractor=self.params.get('force_generic_extractor')) + for url in additional_urls + ] + return ie_result + elif result_type == 'url': + # We have to add extra_info to the results because it may be + # contained in a playlist + return self.extract_info( + ie_result['url'], download, + ie_key=ie_result.get('ie_key'), + extra_info=extra_info) + elif result_type == 'url_transparent': + # Use the information from the embedding page + info = self.extract_info( + ie_result['url'], ie_key=ie_result.get('ie_key'), + extra_info=extra_info, download=False, process=False) + + # extract_info may return None when ignoreerrors is enabled and + # extraction failed with an error, don't crash and return early + # in this case + if not info: + return info + + force_properties = dict( + (k, v) for k, v in ie_result.items() if v is not None) + for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): + if f in force_properties: + del force_properties[f] + new_result = info.copy() + new_result.update(force_properties) + + # Extracted info may not be a video result (i.e. + # info.get('_type', 'video') != video) but rather an url or + # url_transparent. In such cases outer metadata (from ie_result) + # should be propagated to inner one (info). For this to happen + # _type of info should be overridden with url_transparent. This + # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. + if new_result.get('_type') == 'url': + new_result['_type'] = 'url_transparent' + + return self.process_ie_result( + new_result, download=download, extra_info=extra_info) + elif result_type in ('playlist', 'multi_video'): + # Protect from infinite recursion due to recursively nested playlists + # (see https://github.com/ytdl-org/youtube-dl/issues/27833) + webpage_url = ie_result['webpage_url'] + if webpage_url in self._playlist_urls: + self.to_screen( + '[download] Skipping already downloaded playlist: %s' + % ie_result.get('title') or ie_result.get('id')) + return + + self._playlist_level += 1 + self._playlist_urls.add(webpage_url) + self._sanitize_thumbnails(ie_result) + try: + return self.__process_playlist(ie_result, download) + finally: + self._playlist_level -= 1 + if not self._playlist_level: + self._playlist_urls.clear() + elif result_type == 'compat_list': + self.report_warning( + 'Extractor %s returned a compat_list result. ' + 'It needs to be updated.' % ie_result.get('extractor')) + + def _fixup(r): + self.add_extra_info(r, { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), + 'extractor_key': ie_result['extractor_key'], + }) + return r + ie_result['entries'] = [ + self.process_ie_result(_fixup(r), download, extra_info) + for r in ie_result['entries'] + ] + return ie_result + else: + raise Exception('Invalid result type: %s' % result_type) + + def _ensure_dir_exists(self, path): + return make_dir(path, self.report_error) + + @staticmethod + def _playlist_infodict(ie_result, **kwargs): + return { + **ie_result, + 'playlist': ie_result.get('title') or ie_result.get('id'), + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), + 'playlist_index': 0, + **kwargs, + } + + def __process_playlist(self, ie_result, download): + # We process each entry in the playlist + playlist = ie_result.get('title') or ie_result.get('id') + self.to_screen('[download] Downloading playlist: %s' % playlist) + + if 'entries' not in ie_result: + raise EntryNotInPlaylist('There are no entries') + + MissingEntry = object() + incomplete_entries = bool(ie_result.get('requested_entries')) + if incomplete_entries: + def fill_missing_entries(entries, indices): + ret = [MissingEntry] * max(indices) + for i, entry in zip(indices, entries): + ret[i - 1] = entry + return ret + ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) + + playlist_results = [] + + playliststart = self.params.get('playliststart', 1) + playlistend = self.params.get('playlistend') + # For backwards compatibility, interpret -1 as whole list + if playlistend == -1: + playlistend = None + + playlistitems_str = self.params.get('playlist_items') + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) + + ie_entries = ie_result['entries'] + if isinstance(ie_entries, list): + playlist_count = len(ie_entries) + msg = f'Collected {playlist_count} videos; downloading %d of them' + ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count + + def get_entry(i): + return ie_entries[i - 1] + else: + msg = 'Downloading %d videos' + if not isinstance(ie_entries, (PagedList, LazyList)): + ie_entries = LazyList(ie_entries) + elif isinstance(ie_entries, InAdvancePagedList): + if ie_entries._pagesize == 1: + playlist_count = ie_entries._pagecount + + def get_entry(i): + return YoutubeDL.__handle_extraction_exceptions( + lambda self, i: ie_entries[i - 1] + )(self, i) + + entries, broken = [], False + items = playlistitems if playlistitems is not None else itertools.count(playliststart) + for i in items: + if i == 0: + continue + if playlistitems is None and playlistend is not None and playlistend < i: + break + entry = None + try: + entry = get_entry(i) + if entry is MissingEntry: + raise EntryNotInPlaylist() + except (IndexError, EntryNotInPlaylist): + if incomplete_entries: + raise EntryNotInPlaylist(f'Entry {i} cannot be found') + elif not playlistitems: + break + entries.append(entry) + try: + if entry is not None: + self._match_entry(entry, incomplete=True, silent=True) + except (ExistingVideoReached, RejectedVideoReached): + broken = True + break + ie_result['entries'] = entries + + # Save playlist_index before re-ordering + entries = [ + ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry) + for i, entry in enumerate(entries, 1) + if entry is not None] + n_entries = len(entries) + + if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend): + ie_result['playlist_count'] = n_entries + + if not playlistitems and (playliststart != 1 or playlistend): + playlistitems = list(range(playliststart, playliststart + n_entries)) + ie_result['requested_entries'] = playlistitems + + _infojson_written = False + write_playlist_files = self.params.get('allow_playlist_files', True) + if write_playlist_files and self.params.get('list_thumbnails'): + self.list_thumbnails(ie_result) + if write_playlist_files and not self.params.get('simulate'): + ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries) + _infojson_written = self._write_info_json( + 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) + if _infojson_written is None: + return + if self._write_description('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_description')) is None: + return + # TODO: This should be passed to ThumbnailsConvertor if necessary + self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) + + if self.params.get('playlistreverse', False): + entries = entries[::-1] + if self.params.get('playlistrandom', False): + random.shuffle(entries) + + x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + + self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) + failures = 0 + max_failures = self.params.get('skip_playlist_after_errors') or float('inf') + for i, entry_tuple in enumerate(entries, 1): + playlist_index, entry = entry_tuple + if 'playlist-index' in self.params.get('compat_opts', []): + playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 + self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + # This __x_forwarded_for_ip thing is a bit ugly but requires + # minimal changes + if x_forwarded_for: + entry['__x_forwarded_for_ip'] = x_forwarded_for + extra = { + 'n_entries': n_entries, + '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + 'playlist_count': ie_result.get('playlist_count'), + 'playlist_index': playlist_index, + 'playlist_autonumber': i, + 'playlist': playlist, + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), + 'extractor_key': ie_result['extractor_key'], + } + + if self._match_entry(entry, incomplete=True) is not None: + continue + + entry_result = self.__process_iterable_entry(entry, download, extra) + if not entry_result: + failures += 1 + if failures >= max_failures: + self.report_error( + 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) + break + playlist_results.append(entry_result) + ie_result['entries'] = playlist_results + + # Write the updated info to json + if _infojson_written and self._write_info_json( + 'updated playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: + return + + ie_result = self.run_all_pps('playlist', ie_result) + self.to_screen(f'[download] Finished downloading playlist: {playlist}') + return ie_result + + @__handle_extraction_exceptions + def __process_iterable_entry(self, entry, download, extra_info): + return self.process_ie_result( + entry, download=download, extra_info=extra_info) + + def _build_format_filter(self, filter_spec): + " Returns a function to filter the formats according to the filter_spec " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s* + (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s* + (?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.fullmatch(filter_spec) + if m: + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), filter_spec)) + op = OPERATORS[m.group('op')] + + if not m: + STR_OPERATORS = { + '=': operator.eq, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, + } + str_operator_rex = re.compile(r'''(?x)\s* + (?P[a-zA-Z0-9._-]+)\s* + (?P!\s*)?(?P%s)(?P\s*\?)?\s* + (?P[a-zA-Z0-9._-]+)\s* + ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + m = str_operator_rex.fullmatch(filter_spec) + if m: + comparison_value = m.group('value') + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op(attr, value) + else: + op = str_op + + if not m: + raise SyntaxError('Invalid filter specification %r' % filter_spec) + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + return _filter + + def _check_formats(self, formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + path = self.get_output_path('temp') + if not self._ensure_dir_exists(f'{path}/'): + continue + temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + + def _default_format_spec(self, info_dict, download=True): + + def can_merge(): + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + + prefer_best = ( + not self.params.get('simulate') + and download + and ( + not can_merge() + or info_dict.get('is_live', False) + or self.outtmpl_dict['default'] == '-')) + compat = ( + prefer_best + or self.params.get('allow_multiple_audio_streams', False) + or 'format-spec' in self.params.get('compat_opts', [])) + + return ( + 'best/bestvideo+bestaudio' if prefer_best + else 'bestvideo*+bestaudio/best' if not compat + else 'bestvideo+bestaudio/best') + + def build_format_selector(self, format_spec): + def syntax_error(note, start): + message = ( + 'Invalid format specification: ' + '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + return SyntaxError(message) + + PICKFIRST = 'PICKFIRST' + MERGE = 'MERGE' + SINGLE = 'SINGLE' + GROUP = 'GROUP' + FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + + allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), + 'video': self.params.get('allow_multiple_video_streams', False)} + + check_formats = self.params.get('check_formats') == 'selected' + + def _parse_filter(tokens): + filter_parts = [] + for type, string, start, _, _ in tokens: + if type == tokenize.OP and string == ']': + return ''.join(filter_parts) + else: + filter_parts.append(string) + + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the surrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): + selectors = [] + current_selector = None + for type, string, start, _, _ in tokens: + # ENCODING is only defined in python 3.x + if type == getattr(tokenize, 'ENCODING', None): + continue + elif type in [tokenize.NAME, tokenize.NUMBER]: + current_selector = FormatSelector(SINGLE, string, []) + elif type == tokenize.OP: + if string == ')': + if not inside_group: + # ')' will be handled by the parentheses group + tokens.restore_last_token() + break + elif inside_merge and string in ['/', ',']: + tokens.restore_last_token() + break + elif inside_choice and string == ',': + tokens.restore_last_token() + break + elif string == ',': + if not current_selector: + raise syntax_error('"," must follow a format selector', start) + selectors.append(current_selector) + current_selector = None + elif string == '/': + if not current_selector: + raise syntax_error('"/" must follow a format selector', start) + first_choice = current_selector + second_choice = _parse_format_selection(tokens, inside_choice=True) + current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) + elif string == '[': + if not current_selector: + current_selector = FormatSelector(SINGLE, 'best', []) + format_filter = _parse_filter(tokens) + current_selector.filters.append(format_filter) + elif string == '(': + if current_selector: + raise syntax_error('Unexpected "("', start) + group = _parse_format_selection(tokens, inside_group=True) + current_selector = FormatSelector(GROUP, group, []) + elif string == '+': + if not current_selector: + raise syntax_error('Unexpected "+"', start) + selector_1 = current_selector + selector_2 = _parse_format_selection(tokens, inside_merge=True) + if not selector_2: + raise syntax_error('Expected a selector', start) + current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) + else: + raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + elif type == tokenize.ENDMARKER: + break + if current_selector: + selectors.append(current_selector) + return selectors + + def _merge(formats_pair): + format_1, format_2 = formats_pair + + formats_info = [] + formats_info.extend(format_1.get('requested_formats', (format_1,))) + formats_info.extend(format_2.get('requested_formats', (format_2,))) + + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {'video': False, 'audio': False} + for (i, fmt_info) in enumerate(formats_info): + if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': + formats_info.pop(i) + continue + for aud_vid in ['audio', 'video']: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + break + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] + audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] + + the_only_video = video_fmts[0] if len(video_fmts) == 1 else None + the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None + + output_ext = self.params.get('merge_output_format') + if not output_ext: + if the_only_video: + output_ext = the_only_video['ext'] + elif the_only_audio and not video_fmts: + output_ext = the_only_audio['ext'] + else: + output_ext = 'mkv' + + filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) + + new_dict = { + 'requested_formats': formats_info, + 'format': '+'.join(filtered('format')), + 'format_id': '+'.join(filtered('format_id')), + 'ext': output_ext, + 'protocol': '+'.join(map(determine_protocol, formats_info)), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, + 'tbr': sum(filtered('tbr', 'vbr', 'abr')), + } + + if the_only_video: + new_dict.update({ + 'width': the_only_video.get('width'), + 'height': the_only_video.get('height'), + 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), + 'fps': the_only_video.get('fps'), + 'dynamic_range': the_only_video.get('dynamic_range'), + 'vcodec': the_only_video.get('vcodec'), + 'vbr': the_only_video.get('vbr'), + 'stretched_ratio': the_only_video.get('stretched_ratio'), + }) + + if the_only_audio: + new_dict.update({ + 'acodec': the_only_audio.get('acodec'), + 'abr': the_only_audio.get('abr'), + 'asr': the_only_audio.get('asr'), + }) + + return new_dict + + def _check_formats(formats): + if not check_formats: + yield from formats + return + yield from self._check_formats(formats) + + def _build_selector_function(selector): + if isinstance(selector, list): # , + fs = [_build_selector_function(s) for s in selector] + + def selector_function(ctx): + for f in fs: + yield from f(ctx) + return selector_function + + elif selector.type == GROUP: # () + selector_function = _build_selector_function(selector.selector) + + elif selector.type == PICKFIRST: # / + fs = [_build_selector_function(s) for s in selector.selector] + + def selector_function(ctx): + for f in fs: + picked_formats = list(f(ctx)) + if picked_formats: + return picked_formats + return [] + + elif selector.type == MERGE: # + + selector_1, selector_2 = map(_build_selector_function, selector.selector) + + def selector_function(ctx): + for pair in itertools.product(selector_1(ctx), selector_2(ctx)): + yield _merge(pair) + + elif selector.type == SINGLE: # atom + format_spec = selector.selector or 'best' + + # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector + if format_spec == 'all': + def selector_function(ctx): + yield from _check_formats(ctx['formats'][::-1]) + elif format_spec == 'mergeall': + def selector_function(ctx): + formats = list(_check_formats(ctx['formats'])) + if not formats: + return + merged_format = formats[-1] + for f in formats[-2::-1]: + merged_format = _merge((merged_format, f)) + yield merged_format + + else: + format_fallback, format_reverse, format_idx = False, True, 1 + mobj = re.match( + r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', + format_spec) + if mobj is not None: + format_idx = int_or_none(mobj.group('n'), default=1) + format_reverse = mobj.group('bw')[0] == 'b' + format_type = (mobj.group('type') or [None])[0] + not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) + format_modified = mobj.group('mod') is not None + + format_fallback = not format_type and not format_modified # for b, w + _filter_f = ( + (lambda f: f.get('%scodec' % format_type) != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get('%scodec' % not_format_type) == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else lambda f: True) # b*, w* + filter_f = lambda f: _filter_f(f) and ( + f.get('vcodec') != 'none' or f.get('acodec') != 'none') + else: + if format_spec in self._format_selection_exts['audio']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' + elif format_spec in self._format_selection_exts['video']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + elif format_spec in self._format_selection_exts['storyboards']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' + else: + filter_f = lambda f: f.get('format_id') == format_spec # id + + def selector_function(ctx): + formats = list(ctx['formats']) + matches = list(filter(filter_f, formats)) if filter_f is not None else formats + if format_fallback and ctx['incomplete_formats'] and not matches: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = formats + matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) + try: + yield matches[format_idx - 1] + except IndexError: + return + + filters = [self._build_format_filter(f) for f in selector.filters] + + def final_selector(ctx): + ctx_copy = dict(ctx) + for _filter in filters: + ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) + return selector_function(ctx_copy) + return final_selector + + stream = io.BytesIO(format_spec.encode('utf-8')) + try: + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + except tokenize.TokenError: + raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) + + class TokenIterator(object): + def __init__(self, tokens): + self.tokens = tokens + self.counter = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.counter >= len(self.tokens): + raise StopIteration() + value = self.tokens[self.counter] + self.counter += 1 + return value + + next = __next__ + + def restore_last_token(self): + self.counter -= 1 + + parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) + return _build_selector_function(parsed_selector) + + def _calc_headers(self, info_dict): + res = std_headers.copy() + res.update(info_dict.get('http_headers') or {}) + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + + return res + + def _calc_cookies(self, info_dict): + pr = sanitized_Request(info_dict['url']) + self.cookiejar.add_cookie_header(pr) + return pr.get_header('Cookie') + + def _sort_thumbnails(self, thumbnails): + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + + def _sanitize_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] + if not thumbnails: + return + + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t + + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + if self.params.get('check_formats') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) + else: + info_dict['thumbnails'] = thumbnails + + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + self._num_videos += 1 + + if 'id' not in info_dict: + raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) + elif not info_dict.get('id'): + raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) + + info_dict['fulltitle'] = info_dict.get('title') + if 'title' not in info_dict: + raise ExtractorError('Missing "title" field in extractor result', + video_id=info_dict['id'], ie=info_dict['extractor']) + elif not info_dict.get('title'): + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + + def report_force_conversion(field, field_not, conversion): + self.report_warning( + '"%s" field is not %s - forcing %s conversion, there is an error in extractor' + % (field, field_not, conversion)) + + def sanitize_string_field(info, string_field): + field = info.get(string_field) + if field is None or isinstance(field, compat_str): + return + report_force_conversion(string_field, 'a string', 'string') + info[string_field] = compat_str(field) + + def sanitize_numeric_fields(info): + for numeric_field in self._NUMERIC_FIELDS: + field = info.get(numeric_field) + if field is None or isinstance(field, compat_numeric_types): + continue + report_force_conversion(numeric_field, 'numeric', 'int') + info[numeric_field] = int_or_none(field) + + sanitize_string_field(info_dict, 'id') + sanitize_numeric_fields(info_dict) + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + self._sanitize_thumbnails(info_dict) + + thumbnail = info_dict.get('thumbnail') + thumbnails = info_dict.get('thumbnails') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: + info_dict['thumbnail'] = thumbnails[-1]['url'] + + if info_dict.get('display_id') is None and 'id' in info_dict: + info_dict['display_id'] = info_dict['id'] + + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + + for ts_key, date_key in ( + ('timestamp', 'upload_date'), + ('release_timestamp', 'release_date'), + ('modified_timestamp', 'modified_date'), + ): + if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + try: + upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) + info_dict[date_key] = upload_date.strftime('%Y%m%d') + except (ValueError, OverflowError, OSError): + pass + + live_keys = ('is_live', 'was_live') + live_status = info_dict.get('live_status') + if live_status is None: + for key in live_keys: + if info_dict.get(key) is False: + continue + if info_dict.get(key): + live_status = key + break + if all(info_dict.get(key) is False for key in live_keys): + live_status = 'not_live' + if live_status: + info_dict['live_status'] = live_status + for key in live_keys: + if info_dict.get(key) is None: + info_dict[key] = (live_status == key) + + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + + for cc_kind in ('subtitles', 'automatic_captions'): + cc = info_dict.get(cc_kind) + if cc: + for _, subtitle in cc.items(): + for subtitle_format in subtitle: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('ext') is None: + subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() + + automatic_captions = info_dict.get('automatic_captions') + subtitles = info_dict.get('subtitles') + + info_dict['requested_subtitles'] = self.process_subtitles( + info_dict['id'], subtitles, automatic_captions) + + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + if not self.params.get('allow_unplayable_formats'): + formats = [f for f in formats if not f.get('has_drm')] + + if info_dict.get('is_live'): + get_from_start = bool(self.params.get('live_from_start')) + formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] + if not get_from_start: + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + + if not formats: + self.raise_no_formats(info_dict) + + def is_wellformed(f): + url = f.get('url') + if not url: + self.report_warning( + '"url" field is missing or empty - skipping format, ' + 'there is an error in extractor') + return False + if isinstance(url, bytes): + sanitize_string_field(f, 'url') + return True + + # Filter out malformed formats for better extraction robustness + formats = list(filter(is_wellformed, formats)) + + formats_dict = {} + + # We check that all the formats have the format and format_id fields + for i, format in enumerate(formats): + sanitize_string_field(format, 'format_id') + sanitize_numeric_fields(format) + format['url'] = sanitize_url(format['url']) + if not format.get('format_id'): + format['format_id'] = compat_str(i) + else: + # Sanitize format_id from characters used in format selector expression + format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) + format_id = format['format_id'] + if format_id not in formats_dict: + formats_dict[format_id] = [] + formats_dict[format_id].append(format) + + # Make sure all formats have unique format_id + common_exts = set(itertools.chain(*self._format_selection_exts.values())) + for format_id, ambiguous_formats in formats_dict.items(): + ambigious_id = len(ambiguous_formats) > 1 + for i, format in enumerate(ambiguous_formats): + if ambigious_id: + format['format_id'] = '%s-%d' % (format_id, i) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + # Ensure there is no conflict between id and ext in format selection + # See https://github.com/yt-dlp/yt-dlp/issues/1282 + if format['format_id'] != format['ext'] and format['format_id'] in common_exts: + format['format_id'] = 'f%s' % format['format_id'] + + for i, format in enumerate(formats): + if format.get('format') is None: + format['format'] = '{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=format_field(format, 'format_note', ' (%s)'), + ) + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' + if (info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) + + # Add HTTP headers, so that external programs can use them from the + # json output + full_format_info = info_dict.copy() + full_format_info.update(format) + format['http_headers'] = self._calc_headers(full_format_info) + # Remove private housekeeping stuff + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] + + # TODO Central sorting goes here + + if self.params.get('check_formats') is True: + formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + + if not formats or formats[0] is not info_dict: + # only set the 'formats' fields if the original info_dict list them + # otherwise we end up with a circular reference, the first (and unique) + # element in the 'formats' field in info_dict is info_dict itself, + # which can't be exported to json + info_dict['formats'] = formats + + info_dict, _ = self.pre_process(info_dict) + + # The pre-processors may have modified the formats + formats = info_dict.get('formats', [info_dict]) + + list_only = self.params.get('simulate') is None and ( + self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + interactive_format_selection = not list_only and self.format_selector == '-' + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + if self.params.get('listsubtitles'): + if 'automatic_captions' in info_dict: + self.list_subtitles( + info_dict['id'], automatic_captions, 'automatic captions') + self.list_subtitles(info_dict['id'], subtitles, 'subtitles') + if self.params.get('listformats') or interactive_format_selection: + self.list_formats(info_dict) + if list_only: + # Without this printing, -F --print-json will not work + self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) + return + + format_selector = self.format_selector + if format_selector is None: + req_format = self._default_format_spec(info_dict, download=download) + self.write_debug('Default format spec: %s' % req_format) + format_selector = self.build_format_selector(req_format) + + while True: + if interactive_format_selection: + req_format = input( + self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS)) + try: + format_selector = self.build_format_selector(req_format) + except SyntaxError as err: + self.report_error(err, tb=False, is_error=False) + continue + + # While in format selection we may need to have an access to the original + # format set in order to calculate some metrics or do some processing. + # For now we need to be able to guess whether original formats provided + # by extractor are incomplete or not (i.e. whether extractor provides only + # video-only or audio-only formats) for proper formats selection for + # extractors with such incomplete formats (see + # https://github.com/ytdl-org/youtube-dl/pull/5556). + # Since formats may be filtered during format selection and may not match + # the original formats the results may be incorrect. Thus original formats + # or pre-calculated metrics should be passed to format selection routines + # as well. + # We will pass a context object containing all necessary additional data + # instead of just formats. + # This fixes incorrect format selection issue (see + # https://github.com/ytdl-org/youtube-dl/issues/10083). + incomplete_formats = ( + # All formats are video-only or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) + # all formats are audio-only + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + + ctx = { + 'formats': formats, + 'incomplete_formats': incomplete_formats, + } + + formats_to_download = list(format_selector(ctx)) + if interactive_format_selection and not formats_to_download: + self.report_error('Requested format is not available', tb=False, is_error=False) + continue + break + + if not formats_to_download: + if not self.params.get('ignore_no_formats_error'): + raise ExtractorError('Requested format is not available', expected=True, + video_id=info_dict['id'], ie=info_dict['extractor']) + self.report_warning('Requested format is not available') + # Process what we can, even without any available formats. + formats_to_download = [{}] + + best_format = formats_to_download[-1] + if download: + if best_format: + self.to_screen( + f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): ' + + ', '.join([f['format_id'] for f in formats_to_download])) + max_downloads_reached = False + for i, fmt in enumerate(formats_to_download): + formats_to_download[i] = new_info = dict(info_dict) + # Save a reference to the original info_dict so that it can be modified in process_info if needed + new_info.update(fmt) + new_info['__original_infodict'] = info_dict + try: + self.process_info(new_info) + except MaxDownloadsReached: + max_downloads_reached = True + new_info.pop('__original_infodict') + # Remove copied info + for key, val in tuple(new_info.items()): + if info_dict.get(key) == val: + new_info.pop(key) + if max_downloads_reached: + break + + write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + assert write_archive.issubset({True, False, 'ignore'}) + if True in write_archive and False not in write_archive: + self.record_download_archive(info_dict) + + info_dict['requested_downloads'] = formats_to_download + info_dict = self.run_all_pps('after_video', info_dict) + if max_downloads_reached: + raise MaxDownloadsReached() + + # We update the info dict with the selected best quality format (backwards compatibility) + info_dict.update(best_format) + return info_dict + + def process_subtitles(self, video_id, normal_subtitles, automatic_captions): + """Select the requested subtitles and their format""" + available_subs = {} + if normal_subtitles and self.params.get('writesubtitles'): + available_subs.update(normal_subtitles) + if automatic_captions and self.params.get('writeautomaticsub'): + for lang, cap_info in automatic_captions.items(): + if lang not in available_subs: + available_subs[lang] = cap_info + + if (not self.params.get('writesubtitles') and not + self.params.get('writeautomaticsub') or not + available_subs): + return None + + all_sub_langs = available_subs.keys() + if self.params.get('allsubtitles', False): + requested_langs = all_sub_langs + elif self.params.get('subtitleslangs', False): + # A list is used so that the order of languages will be the same as + # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 + requested_langs = [] + for lang_re in self.params.get('subtitleslangs'): + if lang_re == 'all': + requested_langs.extend(all_sub_langs) + continue + discard = lang_re[0] == '-' + if discard: + lang_re = lang_re[1:] + current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) + if discard: + for lang in current_langs: + while lang in requested_langs: + requested_langs.remove(lang) + else: + requested_langs.extend(current_langs) + requested_langs = orderedSet(requested_langs) + elif 'en' in available_subs: + requested_langs = ['en'] + else: + requested_langs = [list(all_sub_langs)[0]] + if requested_langs: + self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) + + formats_query = self.params.get('subtitlesformat', 'best') + formats_preference = formats_query.split('/') if formats_query else [] + subs = {} + for lang in requested_langs: + formats = available_subs.get(lang) + if formats is None: + self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + continue + for ext in formats_preference: + if ext == 'best': + f = formats[-1] + break + matches = list(filter(lambda f: f['ext'] == ext, formats)) + if matches: + f = matches[-1] + break + else: + f = formats[-1] + self.report_warning( + 'No subtitle format found matching "%s" for language %s, ' + 'using %s' % (formats_query, lang, f['ext'])) + subs[lang] = f + return subs + + def _forceprint(self, key, info_dict): + if info_dict is None: + return + info_copy = info_dict.copy() + info_copy['formats_table'] = self.render_formats_table(info_dict) + info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) + info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) + info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) + + def format_tmpl(tmpl): + mobj = re.match(r'\w+(=?)$', tmpl) + if mobj and mobj.group(1): + return f'{tmpl[:-1]} = %({tmpl[:-1]})r' + elif mobj: + return f'%({tmpl})s' + return tmpl + + for tmpl in self.params['forceprint'].get(key, []): + self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) + + for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): + filename = self.evaluate_outtmpl(file_tmpl, info_dict) + tmpl = format_tmpl(tmpl) + self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') + with io.open(filename, 'a', encoding='utf-8') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') + + def __forced_printings(self, info_dict, filename, incomplete): + def print_mandatory(field, actual_field=None): + if actual_field is None: + actual_field = field + if (self.params.get('force%s' % field, False) + and (not incomplete or info_dict.get(actual_field) is not None)): + self.to_stdout(info_dict[actual_field]) + + def print_optional(field): + if (self.params.get('force%s' % field, False) + and info_dict.get(field) is not None): + self.to_stdout(info_dict[field]) + + info_dict = info_dict.copy() + if filename is not None: + info_dict['filename'] = filename + if info_dict.get('requested_formats') is not None: + # For RTMP URLs, also include the playpath + info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) + elif 'url' in info_dict: + info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + + if (self.params.get('forcejson') + or self.params['forceprint'].get('video') + or self.params['print_to_file'].get('video')): + self.post_extract(info_dict) + self._forceprint('video', info_dict) + + print_mandatory('title') + print_mandatory('id') + print_mandatory('url', 'urls') + print_optional('thumbnail') + print_optional('description') + print_optional('filename') + if self.params.get('forceduration') and info_dict.get('duration') is not None: + self.to_stdout(formatSeconds(info_dict['duration'])) + print_mandatory('format') + + if self.params.get('forcejson'): + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) + + def dl(self, name, info, subtitle=False, test=False): + if not info.get('url'): + self.raise_no_formats(info, True) + + if test: + verbose = self.params.get('verbose') + params = { + 'test': True, + 'quiet': self.params.get('quiet') or not verbose, + 'verbose': verbose, + 'noprogress': not verbose, + 'nopart': True, + 'skip_unavailable_fragments': False, + 'keep_fragments': False, + 'overwrites': True, + '_no_ytdl_file': True, + } + else: + params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) + if not test: + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + urls = '", "'.join( + (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) + for f in info.get('requested_formats', []) or [info]) + self.write_debug('Invoking downloader on "%s"' % urls) + + # Note: Ideally info should be a deep-copied so that hooks cannot modify it. + # But it may contain objects that are not deep-copyable + new_info = self._copy_infodict(info) + if new_info.get('http_headers') is None: + new_info['http_headers'] = self._calc_headers(new_info) + return fd.download(name, new_info, subtitle) + + def existing_file(self, filepaths, *, default_overwrite=True): + existing_files = list(filter(os.path.exists, orderedSet(filepaths))) + if existing_files and not self.params.get('overwrites', default_overwrite): + return existing_files[0] + + for file in existing_files: + self.report_file_delete(file) + os.remove(file) + return None + + def process_info(self, info_dict): + """Process a single resolved IE result. (Modified it in-place)""" + + assert info_dict.get('_type', 'video') == 'video' + original_infodict = info_dict + + if 'format' not in info_dict and 'ext' in info_dict: + info_dict['format'] = info_dict['ext'] + + if self._match_entry(info_dict) is not None: + info_dict['__write_download_archive'] = 'ignore' + return + + self.post_extract(info_dict) + self._num_downloads += 1 + + # info_dict['_filename'] needs to be set for backward compatibility + info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) + temp_filename = self.prepare_filename(info_dict, 'temp') + files_to_move = {} + + # Forced printings + self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) + + if self.params.get('simulate'): + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + return + + if full_filename is None: + return + if not self._ensure_dir_exists(encodeFilename(full_filename)): + return + if not self._ensure_dir_exists(encodeFilename(temp_filename)): + return + + if self._write_description('video', info_dict, + self.prepare_filename(info_dict, 'description')) is None: + return + + sub_files = self._write_subtitles(info_dict, temp_filename) + if sub_files is None: + return + files_to_move.update(dict(sub_files)) + + thumb_files = self._write_thumbnails( + 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) + if thumb_files is None: + return + files_to_move.update(dict(thumb_files)) + + infofn = self.prepare_filename(info_dict, 'infojson') + _infojson_written = self._write_info_json('video', info_dict, infofn) + if _infojson_written: + info_dict['infojson_filename'] = infofn + # For backward compatibility, even though it was a private field + info_dict['__infojson_filename'] = infofn + elif _infojson_written is None: + return + + # Note: Annotations are deprecated + annofn = None + if self.params.get('writeannotations', False): + annofn = self.prepare_filename(info_dict, 'annotation') + if annofn: + if not self._ensure_dir_exists(encodeFilename(annofn)): + return + if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): + self.to_screen('[info] Video annotations are already present') + elif not info_dict.get('annotations'): + self.report_warning('There are no annotations to write.') + else: + try: + self.to_screen('[info] Writing video annotations to: ' + annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning('There are no annotations to write.') + except (OSError, IOError): + self.report_error('Cannot write annotations file: ' + annofn) + return + + # Write internet shortcut files + def _write_link_file(link_type): + if 'webpage_url' not in info_dict: + self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') + return False + linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) + if not self._ensure_dir_exists(encodeFilename(linkfn)): + return False + if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): + self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') + return True + try: + self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') + with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: + template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} + if link_type == 'desktop': + template_vars['filename'] = linkfn[:-(len(link_type) + 1)] + linkfile.write(LINK_TEMPLATES[link_type] % template_vars) + except (OSError, IOError): + self.report_error(f'Cannot write internet shortcut {linkfn}') + return False + return True + + write_links = { + 'url': self.params.get('writeurllink'), + 'webloc': self.params.get('writewebloclink'), + 'desktop': self.params.get('writedesktoplink'), + } + if self.params.get('writelink'): + link_type = ('webloc' if sys.platform == 'darwin' + else 'desktop' if sys.platform.startswith('linux') + else 'url') + write_links[link_type] = True + + if any(should_write and not _write_link_file(link_type) + for link_type, should_write in write_links.items()): + return + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + try: + new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + replace_info_dict(new_info) + except PostProcessingError as err: + self.report_error('Preprocessing: %s' % str(err)) + return + + if self.params.get('skip_download'): + info_dict['filepath'] = temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__files_to_move'] = files_to_move + replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + else: + # Download + info_dict.setdefault('__postprocessors', []) + try: + + def existing_video_file(*filepaths): + ext = info_dict.get('ext') + converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) + file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), + default_overwrite=False) + if file: + info_dict['ext'] = os.path.splitext(file)[1][1:] + return file + + success = True + if info_dict.get('requested_formats') is not None: + + def compatible_formats(formats): + # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them. + video_formats = [format for format in formats if format.get('vcodec') != 'none'] + audio_formats = [format for format in formats if format.get('acodec') != 'none'] + if len(video_formats) > 2 or len(audio_formats) > 2: + return False + + # Check extension + exts = set(format.get('ext') for format in formats) + COMPATIBLE_EXTS = ( + set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), + set(('webm',)), + ) + for ext_sets in COMPATIBLE_EXTS: + if ext_sets.issuperset(exts): + return True + # TODO: Check acodec/vcodec + return False + + requested_formats = info_dict['requested_formats'] + old_ext = info_dict['ext'] + if self.params.get('merge_output_format') is None: + if not compatible_formats(requested_formats): + info_dict['ext'] = 'mkv' + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv') + if (info_dict['ext'] == 'webm' + and info_dict.get('thumbnails') + # check with type instead of pp_key, __name__, or isinstance + # since we dont want any custom PPs to trigger this + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + info_dict['ext'] = 'mkv' + self.report_warning( + 'webm doesn\'t support embedding a thumbnail, mkv will be used') + new_ext = info_dict['ext'] + + def correct_ext(filename, ext=new_ext): + if filename == '-': + return filename + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext in (old_ext, new_ext) + else filename) + return '%s.%s' % (filename_wo_ext, ext) + + # Ensure filename always has a correct extension for successful merge + full_filename = correct_ext(full_filename) + temp_filename = correct_ext(temp_filename) + dl_filename = existing_video_file(full_filename, temp_filename) + info_dict['__real_download'] = False + + downloaded = [] + merger = FFmpegMergerPP(self) + + fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') + if dl_filename is not None: + self.report_file_already_downloaded(dl_filename) + elif fd: + for f in requested_formats if fd != FFmpegFD else []: + f['filepath'] = fname = prepend_extension( + correct_ext(temp_filename, info_dict['ext']), + 'f%s' % f['format_id'], info_dict['ext']) + downloaded.append(fname) + info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download + else: + if self.params.get('allow_unplayable_formats'): + self.report_warning( + 'You have requested merging of multiple formats ' + 'while also allowing unplayable formats to be downloaded. ' + 'The formats won\'t be merged to prevent data corruption.') + elif not merger.available: + self.report_warning( + 'You have requested merging of multiple formats but ffmpeg is not installed. ' + 'The formats won\'t be merged.') + + if temp_filename == '-': + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) + else 'but the formats are incompatible for simultaneous download' if merger.available + else 'but ffmpeg is not installed') + self.report_warning( + f'You have requested downloading multiple formats to stdout {reason}. ' + 'The formats will be streamed one after the other') + fname = temp_filename + for f in requested_formats: + new_info = dict(info_dict) + del new_info['requested_formats'] + new_info.update(f) + if temp_filename != '-': + fname = prepend_extension( + correct_ext(temp_filename, new_info['ext']), + 'f%s' % f['format_id'], new_info['ext']) + if not self._ensure_dir_exists(fname): + return + f['filepath'] = fname + downloaded.append(fname) + partial_success, real_download = self.dl(fname, new_info) + info_dict['__real_download'] = info_dict['__real_download'] or real_download + success = success and partial_success + + if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): + info_dict['__postprocessors'].append(merger) + info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True + else: + for file in downloaded: + files_to_move[file] = None + else: + # Just a single file + dl_filename = existing_video_file(full_filename, temp_filename) + if dl_filename is None or dl_filename == temp_filename: + # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. + # So we should try to resume the download + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download + else: + self.report_file_already_downloaded(dl_filename) + + dl_filename = dl_filename or temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + + except network_exceptions as err: + self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) + except (ContentTooShortError, ) as err: + self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + + if success and full_filename != '-': + + def fixup(): + do_fixup = True + fixup_policy = self.params.get('fixup') + vid = info_dict['id'] + + if fixup_policy in ('ignore', 'never'): + return + elif fixup_policy == 'warn': + do_fixup = False + elif fixup_policy != 'force': + assert fixup_policy in ('detect_or_warn', None) + if not info_dict.get('__real_download'): + do_fixup = False + + def ffmpeg_fixup(cndn, msg, cls): + if not cndn: + return + if not do_fixup: + self.report_warning(f'{vid}: {msg}') + return + pp = cls(self) + if pp.available: + info_dict['__postprocessors'].append(pp) + else: + self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') + + stretched_ratio = info_dict.get('stretched_ratio') + ffmpeg_fixup( + stretched_ratio not in (1, None), + f'Non-uniform pixel ratio {stretched_ratio}', + FFmpegFixupStretchedPP) + + ffmpeg_fixup( + (info_dict.get('requested_formats') is None + and info_dict.get('container') == 'm4a_dash' + and info_dict.get('ext') == 'm4a'), + 'writing DASH m4a. Only some players support this container', + FFmpegFixupM4aPP) + + downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None + downloader = downloader.__name__ if downloader else None + + if info_dict.get('requested_formats') is None: # Not necessary if doing merger + ffmpeg_fixup(downloader == 'HlsFD', + 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', + FFmpegFixupM3u8PP) + ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', + 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) + + ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) + + fixup() + try: + replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) + except PostProcessingError as err: + self.report_error('Postprocessing: %s' % str(err)) + return + try: + for ph in self._post_hooks: + ph(info_dict['filepath']) + except Exception as err: + self.report_error('post hooks: %s' % str(err)) + return + info_dict['__write_download_archive'] = True + + if self.params.get('force_write_download_archive'): + info_dict['__write_download_archive'] = True + + # Make sure the info_dict was modified in-place + assert info_dict is original_infodict + + max_downloads = self.params.get('max_downloads') + if max_downloads is not None and self._num_downloads >= int(max_downloads): + raise MaxDownloadsReached() + + def __download_wrapper(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + res = func(*args, **kwargs) + except UnavailableVideoError as e: + self.report_error(e) + except MaxDownloadsReached as e: + self.to_screen(f'[info] {e}') + raise + except DownloadCancelled as e: + self.to_screen(f'[info] {e}') + if not self.params.get('break_per_url'): + raise + else: + if self.params.get('dump_single_json', False): + self.post_extract(res) + self.to_stdout(json.dumps(self.sanitize_info(res))) + return wrapper + + def download(self, url_list): + """Download a given list of URLs.""" + url_list = variadic(url_list) # Passing a single URL is a common mistake + outtmpl = self.outtmpl_dict['default'] + if (len(url_list) > 1 + and outtmpl != '-' + and '%' not in outtmpl + and self.params.get('max_downloads') != 1): + raise SameFileError(outtmpl) + + for url in url_list: + self.__download_wrapper(self.extract_info)( + url, force_generic_extractor=self.params.get('force_generic_extractor', False)) + + return self._download_retcode + + def download_with_info_file(self, info_filename): + with contextlib.closing(fileinput.FileInput( + [info_filename], mode='r', + openhook=fileinput.hook_encoded('utf-8'))) as f: + # FileInput doesn't have a read method, we can't call json.load + info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) + try: + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') + webpage_url = info.get('webpage_url') + if webpage_url is not None: + self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') + return self.download([webpage_url]) + else: + raise + return self._download_retcode + + @staticmethod + def sanitize_info(info_dict, remove_private_keys=False): + ''' Sanitize the infodict for converting to json ''' + if info_dict is None: + return info_dict + info_dict.setdefault('epoch', int(time.time())) + info_dict.setdefault('_type', 'video') + remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict + keep_keys = ['_type'] # Always keep this to facilitate load-info-json + if remove_private_keys: + remove_keys |= { + 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', + 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + } + reject = lambda k, v: k not in keep_keys and ( + k.startswith('_') or k in remove_keys or v is None) + else: + reject = lambda k, v: k in remove_keys + + def filter_fn(obj): + if isinstance(obj, dict): + return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} + elif isinstance(obj, (list, tuple, set, LazyList)): + return list(map(filter_fn, obj)) + elif obj is None or isinstance(obj, (str, int, float, bool)): + return obj + else: + return repr(obj) + + return filter_fn(info_dict) + + @staticmethod + def filter_requested_info(info_dict, actually_filter=True): + ''' Alias of sanitize_info for backward compatibility ''' + return YoutubeDL.sanitize_info(info_dict, actually_filter) + + @staticmethod + def post_extract(info_dict): + def actual_post_extract(info_dict): + if info_dict.get('_type') in ('playlist', 'multi_video'): + for video_dict in info_dict.get('entries', {}): + actual_post_extract(video_dict or {}) + return + + post_extractor = info_dict.get('__post_extractor') or (lambda: {}) + extra = post_extractor().items() + info_dict.update(extra) + info_dict.pop('__post_extractor', None) + + original_infodict = info_dict.get('__original_infodict') or {} + original_infodict.update(extra) + original_infodict.pop('__post_extractor', None) + + actual_post_extract(info_dict or {}) + + def run_pp(self, pp, infodict): + files_to_delete = [] + if '__files_to_move' not in infodict: + infodict['__files_to_move'] = {} + try: + files_to_delete, infodict = pp.run(infodict) + except PostProcessingError as e: + # Must be True and not 'only_download' + if self.params.get('ignoreerrors') is True: + self.report_error(e) + return infodict + raise + + if not files_to_delete: + return infodict + if self.params.get('keepvideo', False): + for f in files_to_delete: + infodict['__files_to_move'].setdefault(f, '') + else: + for old_filename in set(files_to_delete): + self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) + try: + os.remove(encodeFilename(old_filename)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded original file') + if old_filename in infodict['__files_to_move']: + del infodict['__files_to_move'][old_filename] + return infodict + + def run_all_pps(self, key, info, *, additional_pps=None): + self._forceprint(key, info) + for pp in (additional_pps or []) + self._pps[key]: + info = self.run_pp(pp, info) + return info + + def pre_process(self, ie_info, key='pre_process', files_to_move=None): + info = dict(ie_info) + info['__files_to_move'] = files_to_move or {} + info = self.run_all_pps(key, info) + return info, info.pop('__files_to_move', None) + + def post_process(self, filename, info, files_to_move=None): + """Run all the postprocessors on the given file.""" + info['filepath'] = filename + info['__files_to_move'] = files_to_move or {} + info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors')) + info = self.run_pp(MoveFilesAfterDownloadPP(self), info) + del info['__files_to_move'] + return self.run_all_pps('after_move', info) + + def _make_archive_id(self, info_dict): + video_id = info_dict.get('id') + if not video_id: + return + # Future-proof against any change in case + # and backwards compatibility with prior versions + extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist + if extractor is None: + url = str_or_none(info_dict.get('url')) + if not url: + return + # Try to find matching extractor for the URL and take its ie_key + for ie_key, ie in self._ies.items(): + if ie.suitable(url): + extractor = ie_key + break + else: + return + return '%s %s' % (extractor.lower(), video_id) + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + + vid_id = self._make_archive_id(info_dict) + if not vid_id: + return False # Incomplete video information + + return vid_id in self.archive + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = self._make_archive_id(info_dict) + assert vid_id + self.write_debug(f'Adding to archive: {vid_id}') + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + '\n') + self.archive.add(vid_id) + + @staticmethod + def format_resolution(format, default='unknown'): + if format.get('vcodec') == 'none' and format.get('acodec') != 'none': + return 'audio only' + if format.get('resolution') is not None: + return format['resolution'] + if format.get('width') and format.get('height'): + return '%dx%d' % (format['width'], format['height']) + elif format.get('height'): + return '%sp' % format['height'] + elif format.get('width'): + return '%dx?' % format['width'] + return default + + def _list_format_headers(self, *headers): + if self.params.get('listformats_table', True) is not False: + return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return headers + + def _format_note(self, fdict): + res = '' + if fdict.get('ext') in ['f4f', 'f4m']: + res += '(unsupported)' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s]' % fdict['language'] + if fdict.get('format_note') is not None: + if res: + res += ' ' + res += fdict['format_note'] + if fdict.get('tbr') is not None: + if res: + res += ', ' + res += '%4dk' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] + if (fdict.get('vcodec') is not None + and fdict.get('vcodec') != 'none'): + if res: + res += ', ' + res += fdict['vcodec'] + if fdict.get('vbr') is not None: + res += '@' + elif fdict.get('vbr') is not None and fdict.get('abr') is not None: + res += 'video@' + if fdict.get('vbr') is not None: + res += '%4dk' % fdict['vbr'] + if fdict.get('fps') is not None: + if res: + res += ', ' + res += '%sfps' % fdict['fps'] + if fdict.get('acodec') is not None: + if res: + res += ', ' + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] + elif fdict.get('abr') is not None: + if res: + res += ', ' + res += 'audio' + if fdict.get('abr') is not None: + res += '@%3dk' % fdict['abr'] + if fdict.get('asr') is not None: + res += ' (%5dHz)' % fdict['asr'] + if fdict.get('filesize') is not None: + if res: + res += ', ' + res += format_bytes(fdict['filesize']) + elif fdict.get('filesize_approx') is not None: + if res: + res += ', ' + res += '~' + format_bytes(fdict['filesize_approx']) + return res + + def render_formats_table(self, info_dict): + if not info_dict.get('formats') and not info_dict.get('url'): + return None + + formats = info_dict.get('formats', [info_dict]) + if not self.params.get('listformats_table', True) is not False: + table = [ + [ + format_field(f, 'format_id'), + format_field(f, 'ext'), + self.format_resolution(f), + self._format_note(f) + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) + + delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) + table = [ + [ + self._format_screen(format_field(f, 'format_id'), self.Styles.ID), + format_field(f, 'ext'), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), + format_field(f, 'fps', '\t%d'), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), + delim, + format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), + format_field(f, 'tbr', '\t%dk'), + shorten_protocol_name(f.get('protocol', '')), + delim, + format_field(f, 'vcodec', default='unknown').replace( + 'none', 'images' if f.get('acodec') == 'none' + else self._format_screen('audio only', self.Styles.SUPPRESS)), + format_field(f, 'vbr', '\t%dk'), + format_field(f, 'acodec', default='unknown').replace( + 'none', '' if f.get('vcodec') == 'none' + else self._format_screen('video only', self.Styles.SUPPRESS)), + format_field(f, 'abr', '\t%dk'), + format_field(f, 'asr', '\t%dHz'), + join_nonempty( + self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, + format_field(f, 'language', '[%s]'), + join_nonempty(format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), + delim=' '), + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + header_line = self._list_format_headers( + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') + + return render_table( + header_line, table, hide_empty=True, + delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)) + + def render_thumbnails_table(self, info_dict): + thumbnails = list(info_dict.get('thumbnails') or []) + if not thumbnails: + return None + return render_table( + self._list_format_headers('ID', 'Width', 'Height', 'URL'), + [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) + + def render_subtitles_table(self, video_id, subtitles): + def _row(lang, formats): + exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) + if len(set(names)) == 1: + names = [] if names[0] == 'unknown' else names[:1] + return [lang, ', '.join(names), ', '.join(exts)] + + if not subtitles: + return None + return render_table( + self._list_format_headers('Language', 'Name', 'Formats'), + [_row(lang, formats) for lang, formats in subtitles.items()], + hide_empty=True) + + def __list_table(self, video_id, name, func, *args): + table = func(*args) + if not table: + self.to_screen(f'{video_id} has no {name}') + return + self.to_screen(f'[info] Available {name} for {video_id}:') + self.to_stdout(table) + + def list_formats(self, info_dict): + self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict) + + def list_thumbnails(self, info_dict): + self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict) + + def list_subtitles(self, video_id, subtitles, name='subtitles'): + self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) + + def urlopen(self, req): + """ Start an HTTP download """ + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + return self._opener.open(req, timeout=self._socket_timeout) + + def print_debug_header(self): + if not self.params.get('verbose'): + return + + def get_encoding(stream): + ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) + if not supports_terminal_sequences(stream): + from .compat import WINDOWS_VT_MODE + ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' + return ret + + encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( + locale.getpreferredencoding(), + sys.getfilesystemencoding(), + get_encoding(self._screen_file), get_encoding(self._err_file), + self.get_encoding()) + + logger = self.params.get('logger') + if logger: + write_debug = lambda msg: logger.debug(f'[debug] {msg}') + write_debug(encoding_str) + else: + write_string(f'[debug] {encoding_str}\n', encoding=None) + write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') + + source = detect_variant() + write_debug(join_nonempty( + 'yt-dlp version', __version__, + f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', + '' if source == 'unknown' else f'({source})', + delim=' ')) + if not _LAZY_LOADER: + if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + write_debug('Lazy loading extractors is forcibly disabled') + else: + write_debug('Lazy loading extractors is disabled') + if plugin_extractors or plugin_postprocessors: + write_debug('Plugins: %s' % [ + '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) + if self.params.get('compat_opts'): + write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) + + if source == 'source': + try: + sp = Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) + out, err = sp.communicate_or_kill() + out = out.decode().strip() + if re.match('[0-9a-f]+', out): + write_debug('Git HEAD: %s' % out) + except Exception: + try: + sys.exc_clear() + except Exception: + pass + + def python_implementation(): + impl_name = platform.python_implementation() + if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'): + return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] + return impl_name + + write_debug('Python version %s (%s %s) - %s' % ( + platform.python_version(), + python_implementation(), + platform.architecture()[0], + platform_name())) + + exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) + ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} + if ffmpeg_features: + exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) + + exe_versions['rtmpdump'] = rtmpdump_version() + exe_versions['phantomjs'] = PhantomJSwrapper._version() + exe_str = ', '.join( + f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v + ) or 'none' + write_debug('exe versions: %s' % exe_str) + + from .downloader.websocket import has_websockets + from .postprocessor.embedthumbnail import has_mutagen + from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE + + lib_str = join_nonempty( + compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], + SECRETSTORAGE_AVAILABLE and 'secretstorage', + has_mutagen and 'mutagen', + SQLITE_AVAILABLE and 'sqlite', + has_websockets and 'websockets', + delim=', ') or 'none' + write_debug('Optional libraries: %s' % lib_str) + + proxy_map = {} + for handler in self._opener.handlers: + if hasattr(handler, 'proxies'): + proxy_map.update(handler.proxies) + write_debug(f'Proxy map: {proxy_map}') + + # Not implemented + if False and self.params.get('call_home'): + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + write_debug('Public IP address: %s' % ipaddr) + latest_version = self.urlopen( + 'https://yt-dl.org/latest/version').read().decode('utf-8') + if version_tuple(latest_version) > version_tuple(__version__): + self.report_warning( + 'You are using an outdated version (newest version: %s)! ' + 'See https://yt-dl.org/update if you need help updating.' % + latest_version) + + def _setup_opener(self): + timeout_val = self.params.get('socket_timeout') + self._socket_timeout = 20 if timeout_val is None else float(timeout_val) + + opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') + opts_cookiefile = self.params.get('cookiefile') + opts_proxy = self.params.get('proxy') + + self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) + + cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) + if opts_proxy is not None: + if opts_proxy == '': + proxies = {} + else: + proxies = {'http': opts_proxy, 'https': opts_proxy} + else: + proxies = compat_urllib_request.getproxies() + # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) + if 'http' in proxies and 'https' not in proxies: + proxies['https'] = proxies['http'] + proxy_handler = PerRequestProxyHandler(proxies) + + debuglevel = 1 if self.params.get('debug_printtraffic') else 0 + https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) + ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) + redirect_handler = YoutubeDLRedirectHandler() + data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/ytdl-org/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') + file_handler.file_open = file_open + + opener = compat_urllib_request.build_opener( + proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) + + # Delete the default user-agent header, which would otherwise apply in + # cases where our custom HTTP handler doesn't come into play + # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) + opener.addheaders = [] + self._opener = opener + + def encode(self, s): + if isinstance(s, bytes): + return s # Already encoded + + try: + return s.encode(self.get_encoding()) + except UnicodeEncodeError as err: + err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' + raise + + def get_encoding(self): + encoding = self.params.get('encoding') + if encoding is None: + encoding = preferredencoding() + return encoding + + def _write_info_json(self, label, ie_result, infofn, overwrite=None): + ''' Write infojson and returns True = written, False = skip, None = error ''' + if overwrite is None: + overwrite = self.params.get('overwrites', True) + if not self.params.get('writeinfojson'): + return False + elif not infofn: + self.write_debug(f'Skipping writing {label} infojson') + return False + elif not self._ensure_dir_exists(infofn): + return None + elif not overwrite and os.path.exists(infofn): + self.to_screen(f'[info] {label.title()} metadata is already present') + else: + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + except (OSError, IOError): + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None + return True + + def _write_description(self, label, ie_result, descfn): + ''' Write description and returns True = written, False = skip, None = error ''' + if not self.params.get('writedescription'): + return False + elif not descfn: + self.write_debug(f'Skipping writing {label} description') + return False + elif not self._ensure_dir_exists(descfn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(descfn): + self.to_screen(f'[info] {label.title()} description is already present') + elif ie_result.get('description') is None: + self.report_warning(f'There\'s no {label} description to write') + return False + else: + try: + self.to_screen(f'[info] Writing {label} description to: {descfn}') + with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + descfile.write(ie_result['description']) + except (OSError, IOError): + self.report_error(f'Cannot write {label} description file {descfn}') + return None + return True + + def _write_subtitles(self, info_dict, filename): + ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + ret = [] + subtitles = info_dict.get('requested_subtitles') + if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + # subtitles download errors are already managed as troubles in relevant IE + # that way it will silently go on when used with unsupporting IE + return ret + + sub_filename_base = self.prepare_filename(info_dict, 'subtitle') + if not sub_filename_base: + self.to_screen('[info] Skipping writing video subtitles') + return ret + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) + sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) + existing_sub = self.existing_file((sub_filename_final, sub_filename)) + if existing_sub: + self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') + sub_info['filepath'] = existing_sub + ret.append((existing_sub, sub_filename_final)) + continue + + self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/ytdl-org/youtube-dl/issues/10268 + with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue + except (OSError, IOError): + self.report_error(f'Cannot write video subtitles file {sub_filename}') + return None + + try: + sub_copy = sub_info.copy() + sub_copy.setdefault('http_headers', info_dict.get('http_headers')) + self.dl(sub_filename, sub_copy, subtitle=True) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + if self.params.get('ignoreerrors') is not True: # False or 'only_download' + raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err) + self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + return ret + + def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): + ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' + write_all = self.params.get('write_all_thumbnails', False) + thumbnails, ret = [], [] + if write_all or self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') or [] + multiple = write_all and len(thumbnails) > 1 + + if thumb_filename_base is None: + thumb_filename_base = filename + if thumbnails and not thumb_filename_base: + self.write_debug(f'Skipping writing {label} thumbnail') + return ret + + for idx, t in list(enumerate(thumbnails))[::-1]: + thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_display_id = f'{label} thumbnail {t["id"]}' + thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) + thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) + + existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) + if existing_thumb: + self.to_screen('[info] %s is already present' % ( + thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + t['filepath'] = existing_thumb + ret.append((existing_thumb, thumb_filename_final)) + else: + self.to_screen(f'[info] Downloading {thumb_display_id} ...') + try: + uf = self.urlopen(t['url']) + self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') + with open(encodeFilename(thumb_filename), 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + ret.append((thumb_filename, thumb_filename_final)) + t['filepath'] = thumb_filename + except network_exceptions as err: + thumbnails.pop(idx) + self.report_warning(f'Unable to download {thumb_display_id}: {err}') + if ret and not write_all: + break + return ret diff --git a/plugins/youtube_download/yt_dlp/__init__.py b/plugins/youtube_download/yt_dlp/__init__.py new file mode 100644 index 0000000..b93f47e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/__init__.py @@ -0,0 +1,871 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 + +__license__ = 'Public Domain' + +import codecs +import io +import itertools +import os +import random +import re +import sys + +from .options import ( + parseOpts, +) +from .compat import ( + compat_getpass, + compat_os_name, + compat_shlex_quote, + workaround_optparse_bug9161, +) +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .utils import ( + DateRange, + decodeOption, + DownloadCancelled, + DownloadError, + error_to_compat_str, + expand_path, + GeoUtils, + float_or_none, + int_or_none, + match_filter_func, + parse_duration, + preferredencoding, + read_batch_urls, + render_table, + SameFileError, + setproctitle, + std_headers, + write_string, +) +from .update import run_update +from .downloader import ( + FileDownloader, +) +from .extractor import gen_extractors, list_extractors +from .extractor.common import InfoExtractor +from .extractor.adobepass import MSO_INFO +from .postprocessor import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, + MetadataFromFieldPP, + MetadataParserPP, +) +from .YoutubeDL import YoutubeDL + + +def _real_main(argv=None): + # Compatibility fixes for Windows + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) + + workaround_optparse_bug9161() + + setproctitle('yt-dlp') + + parser, opts, args = parseOpts(argv) + warnings, deprecation_warnings = [], [] + + # Set user agent + if opts.user_agent is not None: + std_headers['User-Agent'] = opts.user_agent + + # Set referer + if opts.referer is not None: + std_headers['Referer'] = opts.referer + + # Custom HTTP headers + std_headers.update(opts.headers) + + # Dump user agent + if opts.dump_user_agent: + write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) + sys.exit(0) + + # Batch file verification + batch_urls = [] + if opts.batchfile is not None: + try: + if opts.batchfile == '-': + write_string('Reading URLs from stdin - EOF (%s) to end:\n' % ( + 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D')) + batchfd = sys.stdin + else: + batchfd = io.open( + expand_path(opts.batchfile), + 'r', encoding='utf-8', errors='ignore') + batch_urls = read_batch_urls(batchfd) + if opts.verbose: + write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') + except IOError: + sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) + all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls + _enc = preferredencoding() + all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] + + if opts.list_extractors: + for ie in list_extractors(opts.age_limit): + write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) + matchedUrls = [url for url in all_urls if ie.suitable(url)] + for mu in matchedUrls: + write_string(' ' + mu + '\n', out=sys.stdout) + sys.exit(0) + if opts.list_extractor_descriptions: + for ie in list_extractors(opts.age_limit): + if not ie.working(): + continue + desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if desc is False: + continue + if getattr(ie, 'SEARCH_KEY', None) is not None: + _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') + _COUNTS = ('', '5', '10', 'all') + desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' + write_string(desc + '\n', out=sys.stdout) + sys.exit(0) + if opts.ap_list_mso: + table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] + write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) + sys.exit(0) + + # Conflicting, missing and erroneous options + if opts.format == 'best': + warnings.append('.\n '.join(( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd: + parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"') + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error('using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error('account username missing\n') + if opts.ap_password is not None and opts.ap_username is None: + parser.error('TV Provider account username missing\n') + if opts.autonumber_size is not None: + if opts.autonumber_size <= 0: + parser.error('auto number size must be positive') + if opts.autonumber_start is not None: + if opts.autonumber_start < 0: + parser.error('auto number start must be positive or 0') + if opts.username is not None and opts.password is None: + opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error('invalid rate limit specified') + opts.ratelimit = numeric_limit + if opts.throttledratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit) + if numeric_limit is None: + parser.error('invalid rate limit specified') + opts.throttledratelimit = numeric_limit + if opts.min_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) + if numeric_limit is None: + parser.error('invalid min_filesize specified') + opts.min_filesize = numeric_limit + if opts.max_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) + if numeric_limit is None: + parser.error('invalid max_filesize specified') + opts.max_filesize = numeric_limit + if opts.sleep_interval is not None: + if opts.sleep_interval < 0: + parser.error('sleep interval must be positive or 0') + if opts.max_sleep_interval is not None: + if opts.max_sleep_interval < 0: + parser.error('max sleep interval must be positive or 0') + if opts.sleep_interval is None: + parser.error('min sleep interval must be specified, use --min-sleep-interval') + if opts.max_sleep_interval < opts.sleep_interval: + parser.error('max sleep interval must be greater than or equal to min sleep interval') + else: + opts.max_sleep_interval = opts.sleep_interval + if opts.sleep_interval_subtitles is not None: + if opts.sleep_interval_subtitles < 0: + parser.error('subtitles sleep interval must be positive or 0') + if opts.sleep_interval_requests is not None: + if opts.sleep_interval_requests < 0: + parser.error('requests sleep interval must be positive or 0') + if opts.ap_mso and opts.ap_mso not in MSO_INFO: + parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') + if opts.overwrites: # --yes-overwrites implies --no-continue + opts.continue_dl = False + if opts.concurrent_fragment_downloads <= 0: + parser.error('Concurrent fragments must be positive') + if opts.wait_for_video is not None: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + if min_wait is None or (max_wait is None and '-' in opts.wait_for_video): + parser.error('Invalid time range to wait') + elif max_wait is not None and max_wait < min_wait: + parser.error('Minimum time range to wait must not be longer than the maximum') + opts.wait_for_video = (min_wait, max_wait) + + def parse_retries(retries, name=''): + if retries in ('inf', 'infinite'): + parsed_retries = float('inf') + else: + try: + parsed_retries = int(retries) + except (TypeError, ValueError): + parser.error('invalid %sretry count specified' % name) + return parsed_retries + if opts.retries is not None: + opts.retries = parse_retries(opts.retries) + if opts.file_access_retries is not None: + opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ') + if opts.fragment_retries is not None: + opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ') + if opts.extractor_retries is not None: + opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ') + if opts.buffersize is not None: + numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) + if numeric_buffersize is None: + parser.error('invalid buffer size specified') + opts.buffersize = numeric_buffersize + if opts.http_chunk_size is not None: + numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) + if not numeric_chunksize: + parser.error('invalid http chunk size specified') + opts.http_chunk_size = numeric_chunksize + if opts.playliststart <= 0: + raise parser.error('Playlist start must be positive') + if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: + raise parser.error('Playlist end must be greater than playlist start') + if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() + if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): + parser.error('invalid audio format specified') + if opts.audioquality: + opts.audioquality = opts.audioquality.strip('k').strip('K') + audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan + if audioquality is None or audioquality < 0: + parser.error('invalid audio quality specified') + if opts.recodevideo is not None: + opts.recodevideo = opts.recodevideo.replace(' ', '') + if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): + parser.error('invalid video remux format specified') + if opts.remuxvideo is not None: + opts.remuxvideo = opts.remuxvideo.replace(' ', '') + if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): + parser.error('invalid video remux format specified') + if opts.convertsubtitles is not None: + if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: + parser.error('invalid subtitle format specified') + if opts.convertthumbnails is not None: + if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: + parser.error('invalid thumbnail format specified') + if opts.cookiesfrombrowser is not None: + mobj = re.match(r'(?P[^+:]+)(\s*\+\s*(?P[^:]+))?(\s*:(?P.+))?', opts.cookiesfrombrowser) + if mobj is None: + parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') + browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + parser.error(f'unsupported browser specified for cookies: "{browser_name}". ' + f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') + if keyring is not None: + keyring = keyring.upper() + if keyring not in SUPPORTED_KEYRINGS: + parser.error(f'unsupported keyring specified for cookies: "{keyring}". ' + f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') + opts.cookiesfrombrowser = (browser_name, profile, keyring) + geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country + if geo_bypass_code is not None: + try: + GeoUtils.random_ipv4(geo_bypass_code) + except Exception: + parser.error('unsupported geo-bypass country or ip-block') + + if opts.date is not None: + date = DateRange.day(opts.date) + else: + date = DateRange(opts.dateafter, opts.datebefore) + + compat_opts = opts.compat_opts + + def report_conflict(arg1, arg2): + warnings.append(f'{arg2} is ignored since {arg1} was given') + + def _unused_compat_opt(name): + if name not in compat_opts: + return False + compat_opts.discard(name) + compat_opts.update(['*%s' % name]) + return True + + def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): + attr = getattr(opts, opt_name) + if compat_name in compat_opts: + if attr is None: + setattr(opts, opt_name, not default) + return True + else: + if remove_compat: + _unused_compat_opt(compat_name) + return False + elif attr is None: + setattr(opts, opt_name, default) + return None + + set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') + set_default_compat('no-playlist-metafiles', 'allow_playlist_files') + set_default_compat('no-clean-infojson', 'clean_infojson') + if 'no-attach-info-json' in compat_opts: + if opts.embed_infojson: + _unused_compat_opt('no-attach-info-json') + else: + opts.embed_infojson = False + if 'format-sort' in compat_opts: + opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) + _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) + if _video_multistreams_set is False and _audio_multistreams_set is False: + _unused_compat_opt('multistreams') + outtmpl_default = opts.outtmpl.get('default') + if outtmpl_default == '': + outtmpl_default, opts.skip_download = None, True + del opts.outtmpl['default'] + if opts.useid: + if outtmpl_default is None: + outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' + else: + report_conflict('--output', '--id') + if 'filename' in compat_opts: + if outtmpl_default is None: + outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' + else: + _unused_compat_opt('filename') + + def validate_outtmpl(tmpl, msg): + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) + + for k, tmpl in opts.outtmpl.items(): + validate_outtmpl(tmpl, f'{k} output template') + for type_, tmpl_list in opts.forceprint.items(): + for tmpl in tmpl_list: + validate_outtmpl(tmpl, f'{type_} print template') + for type_, tmpl_list in opts.print_to_file.items(): + for tmpl, file in tmpl_list: + validate_outtmpl(tmpl, f'{type_} print-to-file template') + validate_outtmpl(file, f'{type_} print-to-file filename') + validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') + for k, tmpl in opts.progress_template.items(): + k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' + validate_outtmpl(tmpl, f'{k} template') + + if opts.extractaudio and not opts.keepvideo and opts.format is None: + opts.format = 'bestaudio/best' + + if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: + parser.error('Cannot download a video and extract audio into the same' + ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' + ' template'.format(outtmpl_default)) + + for f in opts.format_sort: + if re.match(InfoExtractor.FormatSort.regex, f) is None: + parser.error('invalid format sort string "%s" specified' % f) + + def metadataparser_actions(f): + if isinstance(f, str): + cmd = '--parse-metadata %s' % compat_shlex_quote(f) + try: + actions = [MetadataFromFieldPP.to_action(f)] + except Exception as err: + parser.error(f'{cmd} is invalid; {err}') + else: + cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) + + for action in actions: + try: + MetadataParserPP.validate_action(*action) + except Exception as err: + parser.error(f'{cmd} is invalid; {err}') + yield action + + if opts.parse_metadata is None: + opts.parse_metadata = [] + if opts.metafromtitle is not None: + opts.parse_metadata.append('title:%s' % opts.metafromtitle) + opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata))) + + any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json + or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail + or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + + any_printing = opts.print_json + download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive + + # If JSON is not printed anywhere, but comments are requested, save it to file + printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json + if opts.getcomments and not printing_json: + opts.writeinfojson = True + + if opts.no_sponsorblock: + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + opts.remove_chapters = opts.remove_chapters or [] + + if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: + if opts.sponskrub: + if opts.remove_chapters: + report_conflict('--remove-chapters', '--sponskrub') + if opts.sponsorblock_mark: + report_conflict('--sponsorblock-mark', '--sponskrub') + if opts.sponsorblock_remove: + report_conflict('--sponsorblock-remove', '--sponskrub') + opts.sponskrub = False + if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: + report_conflict('--split-chapter', '--sponskrub-cut') + opts.sponskrub_cut = False + + if opts.remuxvideo and opts.recodevideo: + report_conflict('--recode-video', '--remux-video') + opts.remuxvideo = False + + if opts.allow_unplayable_formats: + def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): + val = getattr(opts, opt_name) + if (not allowed and val) or (allowed and not allowed(val)): + report_conflict('--allow-unplayable-formats', arg) + setattr(opts, opt_name, default) + + report_unplayable_conflict('extractaudio', '--extract-audio') + report_unplayable_conflict('remuxvideo', '--remux-video') + report_unplayable_conflict('recodevideo', '--recode-video') + report_unplayable_conflict('addmetadata', '--embed-metadata') + report_unplayable_conflict('addchapters', '--embed-chapters') + report_unplayable_conflict('embed_infojson', '--embed-info-json') + opts.embed_infojson = False + report_unplayable_conflict('embedsubtitles', '--embed-subs') + report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') + report_unplayable_conflict('xattrs', '--xattrs') + report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) + opts.fixup = 'never' + report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) + report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) + report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) + opts.sponskrub = False + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + + # PostProcessors + postprocessors = list(opts.add_postprocessors) + if sponsorblock_query: + postprocessors.append({ + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) + if opts.parse_metadata: + postprocessors.append({ + 'key': 'MetadataParser', + 'actions': opts.parse_metadata, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) + if opts.convertsubtitles: + postprocessors.append({ + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + # Run this before the actual video download + 'when': 'before_dl' + }) + if opts.convertthumbnails: + postprocessors.append({ + 'key': 'FFmpegThumbnailsConvertor', + 'format': opts.convertthumbnails, + # Run this before the actual video download + 'when': 'before_dl' + }) + if opts.extractaudio: + postprocessors.append({ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': opts.audioformat, + 'preferredquality': opts.audioquality, + 'nopostoverwrites': opts.nopostoverwrites, + }) + if opts.remuxvideo: + postprocessors.append({ + 'key': 'FFmpegVideoRemuxer', + 'preferedformat': opts.remuxvideo, + }) + if opts.recodevideo: + postprocessors.append({ + 'key': 'FFmpegVideoConvertor', + 'preferedformat': opts.recodevideo, + }) + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. + if opts.embedsubtitles: + already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts + postprocessors.append({ + 'key': 'FFmpegEmbedSubtitle', + # already_have_subtitle = True prevents the file from being deleted after embedding + 'already_have_subtitle': already_have_subtitle + }) + if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts: + opts.writesubtitles = True + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + # this was the old behaviour if only --all-sub was given. + if opts.allsubtitles and not opts.writeautomaticsub: + opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP + remove_chapters_patterns, remove_ranges = [], [] + for regex in opts.remove_chapters: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end') + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + 'key': 'ModifyChapters', + 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': remove_ranges, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + }) + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + if opts.embed_infojson is None: + opts.embed_infojson = 'if_exists' + postprocessors.append({ + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, + }) + # Deprecated + # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment + # but must be below EmbedSubtitle and FFmpegMetadata + # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: + postprocessors.append({ + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, + 'ignoreerror': opts.sponskrub is None, + '_from_cli': True, + }) + if opts.embedthumbnail: + postprocessors.append({ + 'key': 'EmbedThumbnail', + # already_have_thumbnail = True prevents the file from being deleted after embedding + 'already_have_thumbnail': opts.writethumbnail + }) + if not opts.writethumbnail: + opts.writethumbnail = True + opts.outtmpl['pl_thumbnail'] = '' + if opts.split_chapters: + postprocessors.append({ + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + }) + # XAttrMetadataPP should be run after post-processors that may change file contents + if opts.xattrs: + postprocessors.append({'key': 'XAttrMetadata'}) + if opts.concat_playlist != 'never': + postprocessors.append({ + 'key': 'FFmpegConcat', + 'only_multi_video': opts.concat_playlist != 'always', + 'when': 'playlist', + }) + # Exec must be the last PP of each category + if opts.exec_before_dl_cmd: + opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd) + for when, exec_cmd in opts.exec_cmd.items(): + postprocessors.append({ + 'key': 'Exec', + 'exec_cmd': exec_cmd, + # Run this only after the files have been moved to their final locations + 'when': when, + }) + + def report_args_compat(arg, name): + warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name)) + + if 'default' in opts.external_downloader_args: + report_args_compat('--downloader-args', 'external downloaders') + + if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args: + report_args_compat('--post-processor-args', 'post-processors') + opts.postprocessor_args.setdefault('sponskrub', []) + opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + + def report_deprecation(val, old, new=None): + if not val: + return + deprecation_warnings.append( + f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new + else f'{old} is deprecated and may not work as expected') + + report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') + report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') + report_deprecation(opts.include_ads, '--include-ads') + # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future + # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + + final_ext = ( + opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS + else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS + else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') + else None) + + match_filter = ( + None if opts.match_filter is None + else match_filter_func(opts.match_filter)) + + ydl_opts = { + 'usenetrc': opts.usenetrc, + 'netrc_location': opts.netrc_location, + 'username': opts.username, + 'password': opts.password, + 'twofactor': opts.twofactor, + 'videopassword': opts.videopassword, + 'ap_mso': opts.ap_mso, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'quiet': (opts.quiet or any_getting or any_printing), + 'no_warnings': opts.no_warnings, + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forceid': opts.getid, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forceduration': opts.getduration, + 'forcefilename': opts.getfilename, + 'forceformat': opts.getformat, + 'forceprint': opts.forceprint, + 'print_to_file': opts.print_to_file, + 'forcejson': opts.dumpjson or opts.print_json, + 'dump_single_json': opts.dump_single_json, + 'force_write_download_archive': opts.force_write_download_archive, + 'simulate': (any_getting or None) if opts.simulate is None else opts.simulate, + 'skip_download': opts.skip_download, + 'format': opts.format, + 'allow_unplayable_formats': opts.allow_unplayable_formats, + 'ignore_no_formats_error': opts.ignore_no_formats_error, + 'format_sort': opts.format_sort, + 'format_sort_force': opts.format_sort_force, + 'allow_multiple_video_streams': opts.allow_multiple_video_streams, + 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, + 'check_formats': opts.check_formats, + 'listformats': opts.listformats, + 'listformats_table': opts.listformats_table, + 'outtmpl': opts.outtmpl, + 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder, + 'paths': opts.paths, + 'autonumber_size': opts.autonumber_size, + 'autonumber_start': opts.autonumber_start, + 'restrictfilenames': opts.restrictfilenames, + 'windowsfilenames': opts.windowsfilenames, + 'ignoreerrors': opts.ignoreerrors, + 'force_generic_extractor': opts.force_generic_extractor, + 'ratelimit': opts.ratelimit, + 'throttledratelimit': opts.throttledratelimit, + 'overwrites': opts.overwrites, + 'retries': opts.retries, + 'file_access_retries': opts.file_access_retries, + 'fragment_retries': opts.fragment_retries, + 'extractor_retries': opts.extractor_retries, + 'skip_unavailable_fragments': opts.skip_unavailable_fragments, + 'keep_fragments': opts.keep_fragments, + 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, + 'buffersize': opts.buffersize, + 'noresizebuffer': opts.noresizebuffer, + 'http_chunk_size': opts.http_chunk_size, + 'continuedl': opts.continue_dl, + 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, + 'progress_with_newline': opts.progress_with_newline, + 'progress_template': opts.progress_template, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'playlistreverse': opts.playlist_reverse, + 'playlistrandom': opts.playlist_random, + 'noplaylist': opts.noplaylist, + 'logtostderr': outtmpl_default == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, + 'writeannotations': opts.writeannotations, + 'writeinfojson': opts.writeinfojson, + 'allow_playlist_files': opts.allow_playlist_files, + 'clean_infojson': opts.clean_infojson, + 'getcomments': opts.getcomments, + 'writethumbnail': opts.writethumbnail is True, + 'write_all_thumbnails': opts.writethumbnail == 'all', + 'writelink': opts.writelink, + 'writeurllink': opts.writeurllink, + 'writewebloclink': opts.writewebloclink, + 'writedesktoplink': opts.writedesktoplink, + 'writesubtitles': opts.writesubtitles, + 'writeautomaticsub': opts.writeautomaticsub, + 'allsubtitles': opts.allsubtitles, + 'listsubtitles': opts.listsubtitles, + 'subtitlesformat': opts.subtitlesformat, + 'subtitleslangs': opts.subtitleslangs, + 'matchtitle': decodeOption(opts.matchtitle), + 'rejecttitle': decodeOption(opts.rejecttitle), + 'max_downloads': opts.max_downloads, + 'prefer_free_formats': opts.prefer_free_formats, + 'trim_file_name': opts.trim_file_name, + 'verbose': opts.verbose, + 'dump_intermediate_pages': opts.dump_intermediate_pages, + 'write_pages': opts.write_pages, + 'test': opts.test, + 'keepvideo': opts.keepvideo, + 'min_filesize': opts.min_filesize, + 'max_filesize': opts.max_filesize, + 'min_views': opts.min_views, + 'max_views': opts.max_views, + 'daterange': date, + 'cachedir': opts.cachedir, + 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, + 'download_archive': download_archive_fn, + 'break_on_existing': opts.break_on_existing, + 'break_on_reject': opts.break_on_reject, + 'break_per_url': opts.break_per_url, + 'skip_playlist_after_errors': opts.skip_playlist_after_errors, + 'cookiefile': opts.cookiefile, + 'cookiesfrombrowser': opts.cookiesfrombrowser, + 'legacyserverconnect': opts.legacy_server_connect, + 'nocheckcertificate': opts.no_check_certificate, + 'prefer_insecure': opts.prefer_insecure, + 'proxy': opts.proxy, + 'socket_timeout': opts.socket_timeout, + 'bidi_workaround': opts.bidi_workaround, + 'debug_printtraffic': opts.debug_printtraffic, + 'prefer_ffmpeg': opts.prefer_ffmpeg, + 'include_ads': opts.include_ads, + 'default_search': opts.default_search, + 'dynamic_mpd': opts.dynamic_mpd, + 'extractor_args': opts.extractor_args, + 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, + 'encoding': opts.encoding, + 'extract_flat': opts.extract_flat, + 'live_from_start': opts.live_from_start, + 'wait_for_video': opts.wait_for_video, + 'mark_watched': opts.mark_watched, + 'merge_output_format': opts.merge_output_format, + 'final_ext': final_ext, + 'postprocessors': postprocessors, + 'fixup': opts.fixup, + 'source_address': opts.source_address, + 'call_home': opts.call_home, + 'sleep_interval_requests': opts.sleep_interval_requests, + 'sleep_interval': opts.sleep_interval, + 'max_sleep_interval': opts.max_sleep_interval, + 'sleep_interval_subtitles': opts.sleep_interval_subtitles, + 'external_downloader': opts.external_downloader, + 'list_thumbnails': opts.list_thumbnails, + 'playlist_items': opts.playlist_items, + 'xattr_set_filesize': opts.xattr_set_filesize, + 'match_filter': match_filter, + 'no_color': opts.no_color, + 'ffmpeg_location': opts.ffmpeg_location, + 'hls_prefer_native': opts.hls_prefer_native, + 'hls_use_mpegts': opts.hls_use_mpegts, + 'hls_split_discontinuity': opts.hls_split_discontinuity, + 'external_downloader_args': opts.external_downloader_args, + 'postprocessor_args': opts.postprocessor_args, + 'cn_verification_proxy': opts.cn_verification_proxy, + 'geo_verification_proxy': opts.geo_verification_proxy, + 'geo_bypass': opts.geo_bypass, + 'geo_bypass_country': opts.geo_bypass_country, + 'geo_bypass_ip_block': opts.geo_bypass_ip_block, + '_warnings': warnings, + '_deprecation_warnings': deprecation_warnings, + 'compat_opts': compat_opts, + } + + with YoutubeDL(ydl_opts) as ydl: + actual_use = all_urls or opts.load_info_filename + + # Remove cache dir + if opts.rm_cachedir: + ydl.cache.remove() + + # Update version + if opts.update_self: + # If updater returns True, exit. Required for windows + if run_update(ydl): + if actual_use: + sys.exit('ERROR: The program must exit for the update to complete') + sys.exit() + + # Maybe do nothing + if not actual_use: + if opts.update_self or opts.rm_cachedir: + sys.exit() + + ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) + parser.error( + 'You must provide at least one URL.\n' + 'Type yt-dlp --help to see a list of all options.') + + try: + if opts.load_info_filename is not None: + retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) + else: + retcode = ydl.download(all_urls) + except DownloadCancelled: + ydl.to_screen('Aborting remaining downloads') + retcode = 101 + + sys.exit(retcode) + + +def main(argv=None): + try: + _real_main(argv) + except DownloadError: + sys.exit(1) + except SameFileError as e: + sys.exit(f'ERROR: {e}') + except KeyboardInterrupt: + sys.exit('\nERROR: Interrupted by user') + except BrokenPipeError as e: + # https://docs.python.org/3/library/signal.html#note-on-sigpipe + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + sys.exit(f'\nERROR: {e}') + + +__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/plugins/youtube_download/yt_dlp/__main__.py b/plugins/youtube_download/yt_dlp/__main__.py new file mode 100644 index 0000000..c9f4147 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/__main__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +# Execute with +# $ python yt_dlp/__main__.py (2.6+) +# $ python -m yt_dlp (2.7+) + +import sys + +if __package__ is None and not hasattr(sys, 'frozen'): + # direct call of __main__.py + import os.path + path = os.path.realpath(os.path.abspath(__file__)) + sys.path.insert(0, os.path.dirname(os.path.dirname(path))) + +import yt_dlp + +if __name__ == '__main__': + yt_dlp.main() diff --git a/plugins/youtube_download/yt_dlp/aes.py b/plugins/youtube_download/yt_dlp/aes.py new file mode 100644 index 0000000..b37f0dd --- /dev/null +++ b/plugins/youtube_download/yt_dlp/aes.py @@ -0,0 +1,522 @@ +from __future__ import unicode_literals + +from math import ceil + +from .compat import ( + compat_b64decode, + compat_ord, + compat_pycrypto_AES, +) +from .utils import ( + bytes_to_intlist, + intlist_to_bytes, +) + + +if compat_pycrypto_AES: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using pycryptodome """ + return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using pycryptodome """ + return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + +else: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) + + +def unpad_pkcs7(data): + return data[:-compat_ord(data[-1])] + + +BLOCK_SIZE_BYTES = 16 + + +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ctr_decrypt(data, key, iv): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} decrypted data + """ + return aes_ctr_encrypt(data, key, iv) + + +def aes_ctr_encrypt(data, key, iv): + """ + Encrypt with aes in counter mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + counter = iter_vector(iv) + + encrypted_data = [] + for i in range(block_count): + counter_block = next(counter) + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + encrypted_data += xor(block, cipher_counter_block) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_cbc_decrypt(data, key, iv): + """ + Decrypt with aes in CBC mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + decrypted_block = aes_decrypt(block, expanded_key) + decrypted_data += xor(decrypted_block, previous_cipher_block) + previous_cipher_block = block + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + + +def aes_cbc_encrypt(data, key, iv): + """ + Encrypt with aes in CBC mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + remaining_length = BLOCK_SIZE_BYTES - len(block) + block += [remaining_length] * remaining_length + mixed_block = xor(block, previous_cipher_block) + + encrypted_block = aes_encrypt(mixed_block, expanded_key) + encrypted_data += encrypted_block + + previous_cipher_block = encrypted_block + + return encrypted_data + + +def aes_gcm_decrypt_and_verify(data, key, tag, nonce): + """ + Decrypt with aes in GBM mode and checks authenticity using tag + + @param {int[]} data cipher + @param {int[]} key 16-Byte cipher key + @param {int[]} tag authentication tag + @param {int[]} nonce IV (recommended 12-Byte) + @returns {int[]} decrypted data + """ + + # XXX: check aes, gcm param + + hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) + + if len(nonce) == 12: + j0 = nonce + [0, 0, 0, 1] + else: + fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 + ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) + j0 = ghash(hash_subkey, ghash_in) + + # TODO: add nonce support to aes_ctr_decrypt + + # nonce_ctr = j0[:12] + iv_ctr = inc(j0) + + decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) + pad_len = len(data) // 16 * 16 + s_tag = ghash( + hash_subkey, + data + + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + ) + + if tag != aes_ctr_encrypt(s_tag, key, j0): + raise ValueError("Mismatching authentication tag") + + return decrypted_data + + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds + 1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) + data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt(data, expanded_key): + """ + Decrypt one block with aes + + @param {int[]} data 16-Byte cipher + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte state + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + for i in range(rounds, 0, -1): + data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) + data = shift_rows_inv(data) + data = sub_bytes_inv(data) + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = bytes_to_intlist(compat_b64decode(data)) + password = bytes_to_intlist(password.encode('utf-8')) + + key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) + plaintext = intlist_to_bytes(decrypted_data) + + return plaintext + + +RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) +SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) +SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) +MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), + (0x1, 0x2, 0x3, 0x1), + (0x1, 0x1, 0x2, 0x3), + (0x3, 0x1, 0x1, 0x2)) +MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), + (0x9, 0xE, 0xB, 0xD), + (0xD, 0x9, 0xE, 0xB), + (0xB, 0xD, 0x9, 0xE)) +RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, + 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, + 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, + 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, + 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, + 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, + 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, + 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, + 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, + 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, + 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, + 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, + 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, + 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, + 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, + 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) +RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, + 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, + 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, + 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, + 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, + 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, + 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, + 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, + 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, + 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, + 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, + 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, + 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, + 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, + 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, + 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) + + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + + +def iter_vector(iv): + while True: + yield iv + iv = inc(iv) + + +def sub_bytes(data): + return [SBOX[x] for x in data] + + +def sub_bytes_inv(data): + return [SBOX_INV[x] for x in data] + + +def rotate(data): + return data[1:] + [data[0]] + + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + + +def xor(data1, data2): + return [x ^ y for x, y in zip(data1, data2)] + + +def iter_mix_columns(data, matrix): + for i in (0, 4, 8, 12): + for row in matrix: + mixed = 0 + for j in range(4): + # xor is (+) and (-) + mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else + RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) + yield mixed + + +def shift_rows(data): + return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] + + +def shift_rows_inv(data): + return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] + + +def shift_block(data): + data_shifted = [] + + bit = 0 + for n in data: + if bit: + n |= 0x100 + bit = n & 1 + n >>= 1 + data_shifted.append(n) + + return data_shifted + + +def inc(data): + data = data[:] # copy + for i in range(len(data) - 1, -1, -1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data + + +def block_product(block_x, block_y): + # NIST SP 800-38D, Algorithm 1 + + if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: + raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + + block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) + block_v = block_y[:] + block_z = [0] * BLOCK_SIZE_BYTES + + for i in block_x: + for bit in range(7, -1, -1): + if i & (1 << bit): + block_z = xor(block_z, block_v) + + do_xor = block_v[-1] & 1 + block_v = shift_block(block_v) + if do_xor: + block_v = xor(block_v, block_r) + + return block_z + + +def ghash(subkey, data): + # NIST SP 800-38D, Algorithm 2 + + if len(data) % BLOCK_SIZE_BYTES: + raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + + last_y = [0] * BLOCK_SIZE_BYTES + for i in range(0, len(data), BLOCK_SIZE_BYTES): + block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + last_y = block_product(xor(last_y, block), subkey) + + return last_y + + +__all__ = [ + 'aes_ctr_decrypt', + 'aes_cbc_decrypt', + 'aes_cbc_decrypt_bytes', + 'aes_decrypt_text', + 'aes_encrypt', + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', + 'key_expansion', + 'unpad_pkcs7', +] diff --git a/plugins/youtube_download/yt_dlp/cache.py b/plugins/youtube_download/yt_dlp/cache.py new file mode 100644 index 0000000..e5cb193 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/cache.py @@ -0,0 +1,98 @@ +from __future__ import unicode_literals + +import errno +import io +import json +import os +import re +import shutil +import traceback + +from .compat import compat_getenv +from .utils import ( + expand_path, + write_json_file, +) + + +class Cache(object): + def __init__(self, ydl): + self._ydl = ydl + + def _get_root_dir(self): + res = self._ydl.params.get('cachedir') + if res is None: + cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') + res = os.path.join(cache_root, 'yt-dlp') + return expand_path(res) + + def _get_cache_fn(self, section, key, dtype): + assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ + 'invalid section %r' % section + assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key + return os.path.join( + self._get_root_dir(), section, '%s.%s' % (key, dtype)) + + @property + def enabled(self): + return self._ydl.params.get('cachedir') is not False + + def store(self, section, key, data, dtype='json'): + assert dtype in ('json',) + + if not self.enabled: + return + + fn = self._get_cache_fn(section, key, dtype) + try: + try: + os.makedirs(os.path.dirname(fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + self._ydl.write_debug(f'Saving {section}.{key} to cache') + write_json_file(data, fn) + except Exception: + tb = traceback.format_exc() + self._ydl.report_warning( + 'Writing cache to %r failed: %s' % (fn, tb)) + + def load(self, section, key, dtype='json', default=None): + assert dtype in ('json',) + + if not self.enabled: + return default + + cache_fn = self._get_cache_fn(section, key, dtype) + try: + try: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + self._ydl.write_debug(f'Loading {section}.{key} from cache') + return json.load(cachef) + except ValueError: + try: + file_size = os.path.getsize(cache_fn) + except (OSError, IOError) as oe: + file_size = str(oe) + self._ydl.report_warning( + 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) + except IOError: + pass # No cache available + + return default + + def remove(self): + if not self.enabled: + self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') + return + + cachedir = self._get_root_dir() + if not any((term in cachedir) for term in ('cache', 'tmp')): + raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + + self._ydl.to_screen( + 'Removing cache dir %s .' % cachedir, skip_eol=True) + if os.path.exists(cachedir): + self._ydl.to_screen('.', skip_eol=True) + shutil.rmtree(cachedir) + self._ydl.to_screen('.') diff --git a/plugins/youtube_download/yt_dlp/compat.py b/plugins/youtube_download/yt_dlp/compat.py new file mode 100644 index 0000000..b97d451 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat.py @@ -0,0 +1,311 @@ +# coding: utf-8 + +import asyncio +import base64 +import collections +import ctypes +import getpass +import html +import html.parser +import http +import http.client +import http.cookiejar +import http.cookies +import http.server +import itertools +import optparse +import os +import re +import shlex +import shutil +import socket +import struct +import subprocess +import sys +import tokenize +import urllib +import xml.etree.ElementTree as etree +from subprocess import DEVNULL + + +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(Exception): + pass + + +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + from shlex import quote as compat_shlex_quote + + +def compat_ord(c): + if type(c) is int: + return c + else: + return ord(c) + + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return path +else: + compat_realpath = os.path.realpath + + +def compat_print(s): + assert isinstance(s, compat_str) + print(s) + + +# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 +# See http://bugs.python.org/issue9161 for what is broken +def workaround_optparse_bug9161(): + op = optparse.OptionParser() + og = optparse.OptionGroup(op, 'foo') + try: + og.add_option('-t') + except TypeError: + real_add_option = optparse.OptionGroup.add_option + + def _compat_add_option(self, *args, **kwargs): + enc = lambda v: ( + v.encode('ascii', 'replace') if isinstance(v, compat_str) + else v) + bargs = [enc(a) for a in args] + bkwargs = dict( + (k, enc(v)) for k, v in kwargs.items()) + return real_add_option(self, *bargs, **bkwargs) + optparse.OptionGroup.add_option = _compat_add_option + + +try: + compat_Pattern = re.Pattern +except AttributeError: + compat_Pattern = type(re.compile('')) + + +try: + compat_Match = re.Match +except AttributeError: + compat_Match = type(re.compile('').match('')) + + +try: + compat_asyncio_run = asyncio.run # >= 3.7 +except AttributeError: + def compat_asyncio_run(coro): + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(coro) + + asyncio.run = compat_asyncio_run + + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: + _userhome = os.environ['HOME'] + + def compat_expanduser(path): + if not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + +try: + from Cryptodome.Cipher import AES as compat_pycrypto_AES +except ImportError: + try: + from Crypto.Cipher import AES as compat_pycrypto_AES + except ImportError: + compat_pycrypto_AES = None + + +WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None + + +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + global WINDOWS_VT_MODE + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + try: + subprocess.Popen('', shell=True, startupinfo=startupinfo) + WINDOWS_VT_MODE = True + except Exception: + pass + + +# Deprecated + +compat_basestring = str +compat_chr = chr +compat_filter = filter +compat_input = input +compat_integer_types = (int, ) +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_str = str +compat_xpath = lambda xpath: xpath +compat_zip = zip + +compat_collections_abc = collections.abc +compat_HTMLParser = html.parser.HTMLParser +compat_HTTPError = urllib.error.HTTPError +compat_Struct = struct.Struct +compat_b64decode = base64.b64decode +compat_cookiejar = http.cookiejar +compat_cookiejar_Cookie = compat_cookiejar.Cookie +compat_cookies = http.cookies +compat_cookies_SimpleCookie = compat_cookies.SimpleCookie +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = compat_html_entities.html5 +compat_http_client = http.client +compat_http_server = http.server +compat_itertools_count = itertools.count +compat_parse_qs = urllib.parse.parse_qs +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_parse = urllib.parse +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request = urllib.request +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlparse = urllib.parse +compat_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = etree.ParseError + + +# Set public objects + +__all__ = [ + 'WINDOWS_VT_MODE', + 'compat_HTMLParseError', + 'compat_HTMLParser', + 'compat_HTTPError', + 'compat_Match', + 'compat_Pattern', + 'compat_Struct', + 'compat_asyncio_run', + 'compat_b64decode', + 'compat_basestring', + 'compat_chr', + 'compat_collections_abc', + 'compat_cookiejar', + 'compat_cookiejar_Cookie', + 'compat_cookies', + 'compat_cookies_SimpleCookie', + 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_Element', + 'compat_etree_fromstring', + 'compat_etree_register_namespace', + 'compat_expanduser', + 'compat_filter', + 'compat_get_terminal_size', + 'compat_getenv', + 'compat_getpass', + 'compat_html_entities', + 'compat_html_entities_html5', + 'compat_http_client', + 'compat_http_server', + 'compat_input', + 'compat_integer_types', + 'compat_itertools_count', + 'compat_kwargs', + 'compat_map', + 'compat_numeric_types', + 'compat_ord', + 'compat_os_name', + 'compat_parse_qs', + 'compat_print', + 'compat_pycrypto_AES', + 'compat_realpath', + 'compat_setenv', + 'compat_shlex_quote', + 'compat_shlex_split', + 'compat_socket_create_connection', + 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', + 'compat_subprocess_get_DEVNULL', + 'compat_tokenize_tokenize', + 'compat_urllib_error', + 'compat_urllib_parse', + 'compat_urllib_parse_quote', + 'compat_urllib_parse_quote_plus', + 'compat_urllib_parse_unquote', + 'compat_urllib_parse_unquote_plus', + 'compat_urllib_parse_unquote_to_bytes', + 'compat_urllib_parse_urlencode', + 'compat_urllib_parse_urlparse', + 'compat_urllib_parse_urlunparse', + 'compat_urllib_request', + 'compat_urllib_request_DataHandler', + 'compat_urllib_response', + 'compat_urlparse', + 'compat_urlretrieve', + 'compat_xml_parse_error', + 'compat_xpath', + 'compat_zip', + 'windows_enable_vt_mode', + 'workaround_optparse_bug9161', +] diff --git a/plugins/youtube_download/yt_dlp/cookies.py b/plugins/youtube_download/yt_dlp/cookies.py new file mode 100644 index 0000000..fc033a8 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/cookies.py @@ -0,0 +1,955 @@ +import contextlib +import ctypes +import json +import os +import shutil +import struct +import subprocess +import sys +import tempfile +from datetime import datetime, timedelta, timezone +from enum import Enum, auto +from hashlib import pbkdf2_hmac + +from .aes import ( + aes_cbc_decrypt_bytes, + aes_gcm_decrypt_and_verify_bytes, + unpad_pkcs7, +) +from .compat import ( + compat_b64decode, + compat_cookiejar_Cookie, +) +from .utils import ( + expand_path, + Popen, + YoutubeDLCookieJar, +) + +try: + import sqlite3 + SQLITE_AVAILABLE = True +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + SQLITE_AVAILABLE = False + + +try: + import secretstorage + SECRETSTORAGE_AVAILABLE = True +except ImportError: + SECRETSTORAGE_AVAILABLE = False + SECRETSTORAGE_UNAVAILABLE_REASON = ( + 'as the `secretstorage` module is not installed. ' + 'Please install by running `python3 -m pip install secretstorage`.') +except Exception as _err: + SECRETSTORAGE_AVAILABLE = False + SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' + + +CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} +SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} + + +class YDLLogger: + def __init__(self, ydl=None): + self._ydl = ydl + + def debug(self, message): + if self._ydl: + self._ydl.write_debug(message) + + def info(self, message): + if self._ydl: + self._ydl.to_screen(f'[Cookies] {message}') + + def warning(self, message, only_once=False): + if self._ydl: + self._ydl.report_warning(message, only_once) + + def error(self, message): + if self._ydl: + self._ydl.report_error(message) + + +def load_cookies(cookie_file, browser_specification, ydl): + cookie_jars = [] + if browser_specification is not None: + browser_name, profile, keyring = _parse_browser_specification(*browser_specification) + cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring)) + + if cookie_file is not None: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) + if os.access(cookie_file, os.R_OK): + jar.load(ignore_discard=True, ignore_expires=True) + cookie_jars.append(jar) + + return _merge_cookie_jars(cookie_jars) + + +def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None): + if browser_name == 'firefox': + return _extract_firefox_cookies(profile, logger) + elif browser_name == 'safari': + return _extract_safari_cookies(profile, logger) + elif browser_name in CHROMIUM_BASED_BROWSERS: + return _extract_chrome_cookies(browser_name, profile, keyring, logger) + else: + raise ValueError('unknown browser: {}'.format(browser_name)) + + +def _extract_firefox_cookies(profile, logger): + logger.info('Extracting cookies from firefox') + if not SQLITE_AVAILABLE: + logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') + return YoutubeDLCookieJar() + + if profile is None: + search_root = _firefox_browser_dir() + elif _is_path(profile): + search_root = profile + else: + search_root = os.path.join(_firefox_browser_dir(), profile) + + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + if cookie_database_path is None: + raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') + jar = YoutubeDLCookieJar() + for host, name, value, path, expiry, is_secure in cursor.fetchall(): + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + logger.info('Extracted {} cookies from firefox'.format(len(jar))) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +def _firefox_browser_dir(): + if sys.platform in ('linux', 'linux2'): + return os.path.expanduser('~/.mozilla/firefox') + elif sys.platform == 'win32': + return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + elif sys.platform == 'darwin': + return os.path.expanduser('~/Library/Application Support/Firefox') + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + +def _get_chromium_based_browser_settings(browser_name): + # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md + if sys.platform in ('linux', 'linux2'): + config = _config_home() + browser_dir = { + 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(config, 'google-chrome'), + 'chromium': os.path.join(config, 'chromium'), + 'edge': os.path.join(config, 'microsoft-edge'), + 'opera': os.path.join(config, 'opera'), + 'vivaldi': os.path.join(config, 'vivaldi'), + }[browser_name] + + elif sys.platform == 'win32': + appdata_local = os.path.expandvars('%LOCALAPPDATA%') + appdata_roaming = os.path.expandvars('%APPDATA%') + browser_dir = { + 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), + 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + }[browser_name] + + elif sys.platform == 'darwin': + appdata = os.path.expanduser('~/Library/Application Support') + browser_dir = { + 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(appdata, 'Google/Chrome'), + 'chromium': os.path.join(appdata, 'Chromium'), + 'edge': os.path.join(appdata, 'Microsoft Edge'), + 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), + 'vivaldi': os.path.join(appdata, 'Vivaldi'), + }[browser_name] + + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + keyring_name = { + 'brave': 'Brave', + 'chrome': 'Chrome', + 'chromium': 'Chromium', + 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', + 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', + 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', + }[browser_name] + + browsers_without_profiles = {'opera'} + + return { + 'browser_dir': browser_dir, + 'keyring_name': keyring_name, + 'supports_profiles': browser_name not in browsers_without_profiles + } + + +def _extract_chrome_cookies(browser_name, profile, keyring, logger): + logger.info('Extracting cookies from {}'.format(browser_name)) + + if not SQLITE_AVAILABLE: + logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + return YoutubeDLCookieJar() + + config = _get_chromium_based_browser_settings(browser_name) + + if profile is None: + search_root = config['browser_dir'] + elif _is_path(profile): + search_root = profile + config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile + else: + if config['supports_profiles']: + search_root = os.path.join(config['browser_dir'], profile) + else: + logger.error('{} does not support profiles'.format(browser_name)) + search_root = config['browser_dir'] + + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + if cookie_database_path is None: + raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + + decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) + + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.connection.text_factory = bytes + column_names = _get_column_names(cursor, 'cookies') + secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' + cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' + 'expires_utc, {} FROM cookies'.format(secure_column)) + jar = YoutubeDLCookieJar() + failed_cookies = 0 + unencrypted_cookies = 0 + for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): + host_key = host_key.decode('utf-8') + name = name.decode('utf-8') + value = value.decode('utf-8') + path = path.decode('utf-8') + + if not value and encrypted_value: + value = decryptor.decrypt(encrypted_value) + if value is None: + failed_cookies += 1 + continue + else: + unencrypted_cookies += 1 + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + if failed_cookies > 0: + failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + else: + failed_message = '' + logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) + counts = decryptor.cookie_counts.copy() + counts['unencrypted'] = unencrypted_cookies + logger.debug('cookie version breakdown: {}'.format(counts)) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +class ChromeCookieDecryptor: + """ + Overview: + + Linux: + - cookies are either v10 or v11 + - v10: AES-CBC encrypted with a fixed key + - v11: AES-CBC encrypted with an OS protected key (keyring) + - v11 keys can be stored in various places depending on the activate desktop environment [2] + + Mac: + - cookies are either v10 or not v10 + - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux + - not v10: 'old data' stored as plaintext + + Windows: + - cookies are either v10 or not v10 + - v10: AES-GCM encrypted with a key which is encrypted with DPAPI + - not v10: encrypted with DPAPI + + Sources: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc + - KeyStorageLinux::CreateService + """ + + def decrypt(self, encrypted_value): + raise NotImplementedError + + @property + def cookie_counts(self): + raise NotImplementedError + + +def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): + if sys.platform in ('linux', 'linux2'): + return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) + elif sys.platform == 'darwin': + return MacChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform == 'win32': + return WindowsChromeCookieDecryptor(browser_root, logger) + else: + raise NotImplementedError('Chrome cookie decryption is not supported ' + 'on this platform: {}'.format(sys.platform)) + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger, *, keyring=None): + self._logger = logger + self._v10_key = self.derive_key(b'peanuts') + password = _get_linux_keyring_password(browser_keyring_name, keyring, logger) + self._v11_key = None if password is None else self.derive_key(password) + self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + elif version == b'v11': + self._cookie_counts['v11'] += 1 + if self._v11_key is None: + self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) + return None + return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) + + else: + self._cookie_counts['other'] += 1 + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + password = _get_mac_keyring_password(browser_keyring_name, logger) + self._v10_key = None if password is None else self.derive_key(password) + self._cookie_counts = {'v10': 0, 'other': 0} + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + else: + self._cookie_counts['other'] += 1 + # other prefixes are considered 'old data' which were stored as plaintext + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return encrypted_value + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_root, logger): + self._logger = logger + self._v10_key = _get_windows_v10_key(browser_root, logger) + self._cookie_counts = {'v10': 0, 'other': 0} + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + # kNonceLength + nonce_length = 96 // 8 + # boringssl + # EVP_AEAD_AES_GCM_TAG_LEN + authentication_tag_length = 16 + + raw_ciphertext = ciphertext + nonce = raw_ciphertext[:nonce_length] + ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] + authentication_tag = raw_ciphertext[-authentication_tag_length:] + + return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) + + else: + self._cookie_counts['other'] += 1 + # any other prefix means the data is DPAPI encrypted + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + + +def _extract_safari_cookies(profile, logger): + if profile is not None: + logger.error('safari does not support profiles') + if sys.platform != 'darwin': + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') + + with open(cookies_path, 'rb') as f: + cookies_data = f.read() + + jar = parse_safari_cookies(cookies_data, logger=logger) + logger.info('Extracted {} cookies from safari'.format(len(jar))) + return jar + + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data, logger): + self._data = data + self.cursor = 0 + self._logger = logger + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError('invalid read of {} bytes'.format(num_bytes)) + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError('reached end of input') + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + + def read_uint(self, big_endian=False): + data_format = '>I' if big_endian else ' 0: + self._logger.debug('skipping {} bytes ({}): {}'.format( + num_bytes, description, self.read_bytes(num_bytes))) + elif num_bytes < 0: + raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + + def skip_to(self, offset, description='unknown'): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description='unknown'): + self.skip_to(len(self._data), description) + + +def _mac_absolute_time_to_posix(timestamp): + return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) + + +def _parse_safari_cookies_header(data, logger): + p = DataParser(data, logger) + p.expect_bytes(b'cook', 'database signature') + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _parse_safari_cookies_page(data, jar, logger): + p = DataParser(data, logger) + p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + return + + p.skip_to(record_offsets[0], 'unknown page header field') + + for record_offset in record_offsets: + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) + p.skip_to_end('space in between pages') + + +def _parse_safari_cookies_record(data, jar, logger): + p = DataParser(data, logger) + record_size = p.read_uint() + p.skip(4, 'unknown record field 1') + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, 'unknown record field 2') + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, 'unknown record field 3') + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) + return record_size + + p.skip_to(record_size, 'space at the end of the record') + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + return record_size + + +def parse_safari_cookies(data, jar=None, logger=YDLLogger()): + """ + References: + - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc + - this data appears to be out of date but the important parts of the database structure is the same + - there are a few bytes here and there which are skipped during parsing + """ + if jar is None: + jar = YoutubeDLCookieJar() + page_sizes, body_start = _parse_safari_cookies_header(data, logger) + p = DataParser(data[body_start:], logger) + for page_size in page_sizes: + _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) + p.skip_to_end('footer') + return jar + + +class _LinuxDesktopEnvironment(Enum): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h + DesktopEnvironment + """ + OTHER = auto() + CINNAMON = auto() + GNOME = auto() + KDE = auto() + PANTHEON = auto() + UNITY = auto() + XFCE = auto() + + +class _LinuxKeyring(Enum): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h + SelectedLinuxBackend + """ + KWALLET = auto() + GNOMEKEYRING = auto() + BASICTEXT = auto() + + +SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() + + +def _get_linux_desktop_environment(env): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc + GetDesktopEnvironment + """ + xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) + desktop_session = env.get('DESKTOP_SESSION', None) + if xdg_current_desktop is not None: + xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() + + if xdg_current_desktop == 'Unity': + if desktop_session is not None and 'gnome-fallback' in desktop_session: + return _LinuxDesktopEnvironment.GNOME + else: + return _LinuxDesktopEnvironment.UNITY + elif xdg_current_desktop == 'GNOME': + return _LinuxDesktopEnvironment.GNOME + elif xdg_current_desktop == 'X-Cinnamon': + return _LinuxDesktopEnvironment.CINNAMON + elif xdg_current_desktop == 'KDE': + return _LinuxDesktopEnvironment.KDE + elif xdg_current_desktop == 'Pantheon': + return _LinuxDesktopEnvironment.PANTHEON + elif xdg_current_desktop == 'XFCE': + return _LinuxDesktopEnvironment.XFCE + elif desktop_session is not None: + if desktop_session in ('mate', 'gnome'): + return _LinuxDesktopEnvironment.GNOME + elif 'kde' in desktop_session: + return _LinuxDesktopEnvironment.KDE + elif 'xfce' in desktop_session: + return _LinuxDesktopEnvironment.XFCE + else: + if 'GNOME_DESKTOP_SESSION_ID' in env: + return _LinuxDesktopEnvironment.GNOME + elif 'KDE_FULL_SESSION' in env: + return _LinuxDesktopEnvironment.KDE + return _LinuxDesktopEnvironment.OTHER + + +def _choose_linux_keyring(logger): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc + SelectBackend + """ + desktop_environment = _get_linux_desktop_environment(os.environ) + logger.debug('detected desktop environment: {}'.format(desktop_environment.name)) + if desktop_environment == _LinuxDesktopEnvironment.KDE: + linux_keyring = _LinuxKeyring.KWALLET + elif desktop_environment == _LinuxDesktopEnvironment.OTHER: + linux_keyring = _LinuxKeyring.BASICTEXT + else: + linux_keyring = _LinuxKeyring.GNOMEKEYRING + return linux_keyring + + +def _get_kwallet_network_wallet(logger): + """ The name of the wallet used to store network passwords. + + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc + KWalletDBus::NetworkWallet + which does a dbus call to the following function: + https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html + Wallet::NetworkWallet + """ + default_wallet = 'kdewallet' + try: + proc = Popen([ + 'dbus-send', '--session', '--print-reply=literal', + '--dest=org.kde.kwalletd5', + '/modules/kwalletd5', + 'org.kde.KWallet.networkWallet' + ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + stdout, stderr = proc.communicate_or_kill() + if proc.returncode != 0: + logger.warning('failed to read NetworkWallet') + return default_wallet + else: + network_wallet = stdout.decode('utf-8').strip() + logger.debug('NetworkWallet = "{}"'.format(network_wallet)) + return network_wallet + except BaseException as e: + logger.warning('exception while obtaining NetworkWallet: {}'.format(e)) + return default_wallet + + +def _get_kwallet_password(browser_keyring_name, logger): + logger.debug('using kwallet-query to obtain password from kwallet') + + if shutil.which('kwallet-query') is None: + logger.error('kwallet-query command not found. KWallet and kwallet-query ' + 'must be installed to read from KWallet. kwallet-query should be' + 'included in the kwallet package for your distribution') + return b'' + + network_wallet = _get_kwallet_network_wallet(logger) + + try: + proc = Popen([ + 'kwallet-query', + '--read-password', '{} Safe Storage'.format(browser_keyring_name), + '--folder', '{} Keys'.format(browser_keyring_name), + network_wallet + ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + stdout, stderr = proc.communicate_or_kill() + if proc.returncode != 0: + logger.error('kwallet-query failed with return code {}. Please consult ' + 'the kwallet-query man page for details'.format(proc.returncode)) + return b'' + else: + if stdout.lower().startswith(b'failed to read'): + logger.debug('failed to read password from kwallet. Using empty string instead') + # this sometimes occurs in KDE because chrome does not check hasEntry and instead + # just tries to read the value (which kwallet returns "") whereas kwallet-query + # checks hasEntry. To verify this: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + # while starting chrome. + # this may be a bug as the intended behaviour is to generate a random password and store + # it, but that doesn't matter here. + return b'' + else: + logger.debug('password found') + if stdout[-1:] == b'\n': + stdout = stdout[:-1] + return stdout + except BaseException as e: + logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})') + return b'' + + +def _get_gnome_keyring_password(browser_keyring_name, logger): + if not SECRETSTORAGE_AVAILABLE: + logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON)) + return b'' + # the Gnome keyring does not seem to organise keys in the same way as KWallet, + # using `dbus-monitor` during startup, it can be observed that chromium lists all keys + # and presumably searches for its key in the list. It appears that we must do the same. + # https://github.com/jaraco/keyring/issues/556 + with contextlib.closing(secretstorage.dbus_init()) as con: + col = secretstorage.get_default_collection(con) + for item in col.get_all_items(): + if item.get_label() == '{} Safe Storage'.format(browser_keyring_name): + return item.get_secret() + else: + logger.error('failed to read from keyring') + return b'' + + +def _get_linux_keyring_password(browser_keyring_name, keyring, logger): + # note: chrome/chromium can be run with the following flags to determine which keyring backend + # it has chosen to use + # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_ + # Chromium supports a flag: --password-store= so the automatic detection + # will not be sufficient in all cases. + + keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) + logger.debug(f'Chosen keyring: {keyring.name}') + + if keyring == _LinuxKeyring.KWALLET: + return _get_kwallet_password(browser_keyring_name, logger) + elif keyring == _LinuxKeyring.GNOMEKEYRING: + return _get_gnome_keyring_password(browser_keyring_name, logger) + elif keyring == _LinuxKeyring.BASICTEXT: + # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required) + return None + assert False, f'Unknown keyring {keyring}' + + +def _get_mac_keyring_password(browser_keyring_name, logger): + logger.debug('using find-generic-password to obtain password from OSX keychain') + try: + proc = Popen( + ['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + stdout, stderr = proc.communicate_or_kill() + if stdout[-1:] == b'\n': + stdout = stdout[:-1] + return stdout + except BaseException as e: + logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})') + return None + + +def _get_windows_v10_key(browser_root, logger): + path = _find_most_recently_used_file(browser_root, 'Local State') + if path is None: + logger.error('could not find local state file') + return None + with open(path, 'r', encoding='utf8') as f: + data = json.load(f) + try: + base64_key = data['os_crypt']['encrypted_key'] + except KeyError: + logger.error('no encrypted key in Local State') + return None + encrypted_key = compat_b64decode(base64_key) + prefix = b'DPAPI' + if not encrypted_key.startswith(prefix): + logger.error('invalid key') + return None + return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) + + +def pbkdf2_sha1(password, salt, iterations, key_length): + return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + + +def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): + plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) + try: + return plaintext.decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): + try: + plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) + except ValueError: + logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) + return None + + try: + return plaintext.decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_windows_dpapi(ciphertext, logger): + """ + References: + - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata + """ + from ctypes.wintypes import DWORD + + class DATA_BLOB(ctypes.Structure): + _fields_ = [('cbData', DWORD), + ('pbData', ctypes.POINTER(ctypes.c_char))] + + buffer = ctypes.create_string_buffer(ciphertext) + blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) + blob_out = DATA_BLOB() + ret = ctypes.windll.crypt32.CryptUnprotectData( + ctypes.byref(blob_in), # pDataIn + None, # ppszDataDescr: human readable description of pDataIn + None, # pOptionalEntropy: salt? + None, # pvReserved: must be NULL + None, # pPromptStruct: information about prompts to display + 0, # dwFlags + ctypes.byref(blob_out) # pDataOut + ) + if not ret: + logger.warning('failed to decrypt with DPAPI', only_once=True) + return None + + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + + +def _config_home(): + return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) + + +def _open_database_copy(database_path, tmpdir): + # cannot open sqlite databases if they are already in use (e.g. by the browser) + database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') + shutil.copy(database_path, database_copy_path) + conn = sqlite3.connect(database_copy_path) + return conn.cursor() + + +def _get_column_names(cursor, table_name): + table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall() + return [row[1].decode('utf-8') for row in table_info] + + +def _find_most_recently_used_file(root, filename): + # if there are multiple browser profiles, take the most recently used one + paths = [] + for root, dirs, files in os.walk(root): + for file in files: + if file == filename: + paths.append(os.path.join(root, file)) + return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) + + +def _merge_cookie_jars(jars): + output_jar = YoutubeDLCookieJar() + for jar in jars: + for cookie in jar: + output_jar.set_cookie(cookie) + if jar.filename is not None: + output_jar.filename = jar.filename + return output_jar + + +def _is_path(value): + return os.path.sep in value + + +def _parse_browser_specification(browser_name, profile=None, keyring=None): + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser: "{browser_name}"') + if keyring not in (None, *SUPPORTED_KEYRINGS): + raise ValueError(f'unsupported keyring: "{keyring}"') + if profile is not None and _is_path(profile): + profile = os.path.expanduser(profile) + return browser_name, profile, keyring diff --git a/plugins/youtube_download/yt_dlp/downloader/__init__.py b/plugins/youtube_download/yt_dlp/downloader/__init__.py new file mode 100644 index 0000000..acc19f4 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/__init__.py @@ -0,0 +1,135 @@ +from __future__ import unicode_literals + +from ..compat import compat_str +from ..utils import ( + determine_protocol, + NO_DEFAULT +) + + +def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): + info_dict['protocol'] = determine_protocol(info_dict) + info_copy = info_dict.copy() + info_copy['to_stdout'] = to_stdout + + protocols = (protocol or info_copy['protocol']).split('+') + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] + + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif (set(downloaders) == {DashSegmentsFD} + and not (to_stdout and len(protocols) > 1) + and set(protocols) == {'http_dash_segments_generator'}): + return DashSegmentsFD + elif len(downloaders) == 1: + return downloaders[0] + return None + + +# Some of these require get_suitable_downloader +from .common import FileDownloader +from .dash import DashSegmentsFD +from .f4m import F4mFD +from .hls import HlsFD +from .http import HttpFD +from .rtmp import RtmpFD +from .rtsp import RtspFD +from .ism import IsmFD +from .mhtml import MhtmlFD +from .niconico import NiconicoDmcFD +from .websocket import WebSocketFragmentFD +from .youtube_live_chat import YoutubeLiveChatFD +from .external import ( + get_external_downloader, + FFmpegFD, +) + +PROTOCOL_MAP = { + 'rtmp': RtmpFD, + 'rtmpe': RtmpFD, + 'rtmp_ffmpeg': FFmpegFD, + 'm3u8_native': HlsFD, + 'm3u8': FFmpegFD, + 'mms': RtspFD, + 'rtsp': RtspFD, + 'f4m': F4mFD, + 'http_dash_segments': DashSegmentsFD, + 'http_dash_segments_generator': DashSegmentsFD, + 'ism': IsmFD, + 'mhtml': MhtmlFD, + 'niconico_dmc': NiconicoDmcFD, + 'websocket_frag': WebSocketFragmentFD, + 'youtube_live_chat': YoutubeLiveChatFD, + 'youtube_live_chat_replay': YoutubeLiveChatFD, +} + + +def shorten_protocol_name(proto, simplify=False): + short_protocol_names = { + 'm3u8_native': 'm3u8_n', + 'rtmp_ffmpeg': 'rtmp_f', + 'http_dash_segments': 'dash', + 'http_dash_segments_generator': 'dash_g', + 'niconico_dmc': 'dmc', + 'websocket_frag': 'WSfrag', + } + if simplify: + short_protocol_names.update({ + 'https': 'http', + 'ftps': 'ftp', + 'm3u8_native': 'm3u8', + 'http_dash_segments_generator': 'dash', + 'rtmp_ffmpeg': 'rtmp', + 'm3u8_frag_urls': 'm3u8', + 'dash_frag_urls': 'dash', + }) + return short_protocol_names.get(proto, proto) + + +def _get_suitable_downloader(info_dict, protocol, params, default): + """Get the downloader class that can handle the info dict.""" + if default is NO_DEFAULT: + default = HttpFD + + # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): + # return FFmpegFD + + info_dict['protocol'] = protocol + downloaders = params.get('external_downloader') + external_downloader = ( + downloaders if isinstance(downloaders, compat_str) or downloaders is None + else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) + + if external_downloader is None: + if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): + return FFmpegFD + elif external_downloader.lower() != 'native': + ed = get_external_downloader(external_downloader) + if ed.can_download(info_dict, external_downloader): + return ed + + if protocol == 'http_dash_segments': + if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': + return FFmpegFD + + if protocol in ('m3u8', 'm3u8_native'): + if info_dict.get('is_live'): + return FFmpegFD + elif (external_downloader or '').lower() == 'native': + return HlsFD + elif get_suitable_downloader( + info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): + return HlsFD + elif params.get('hls_prefer_native') is True: + return HlsFD + elif params.get('hls_prefer_native') is False: + return FFmpegFD + + return PROTOCOL_MAP.get(protocol, default) + + +__all__ = [ + 'FileDownloader', + 'get_suitable_downloader', + 'shorten_protocol_name', +] diff --git a/plugins/youtube_download/yt_dlp/downloader/common.py b/plugins/youtube_download/yt_dlp/downloader/common.py new file mode 100644 index 0000000..37321e3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/common.py @@ -0,0 +1,472 @@ +from __future__ import division, unicode_literals + +import os +import re +import time +import random +import errno + +from ..utils import ( + decodeArgument, + encodeFilename, + error_to_compat_str, + format_bytes, + sanitize_open, + shell_quote, + timeconvert, + timetuple_from_msec, +) +from ..minicurses import ( + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, + BreaklineStatusPrinter +) + + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. + + Available options: + + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) + retries: Number of times to retry for HTTP error 5xx + file_access_retries: Number of times to retry on file access error + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. + external_downloader_args: A dictionary of downloader keys (in lower case) + and a list of additional command-line arguments for the + executable. Use 'default' as the name for arguments to be + passed to all downloaders. For compatibility with youtube-dl, + a single list of args can also be used + hls_use_mpegts: Use the mpegts container for HLS videos. + http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be + useful for bypassing bandwidth throttling imposed by + a webserver (experimental) + progress_template: See YoutubeDL.py + + Subclasses of this one must re-define the real_download method. + """ + + _TEST_FILE_SIZE = 10241 + params = None + + def __init__(self, ydl, params): + """Create a FileDownloader object with the given options.""" + self.ydl = ydl + self._progress_hooks = [] + self.params = params + self._prepare_multiline_status() + self.add_progress_hook(self.report_progress) + + @staticmethod + def format_seconds(seconds): + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: + return '--:--:--' + if not time.hours: + return '%02d:%02d' % time[1:-1] + return '%02d:%02d:%02d' % time[:-1] + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + if percent is None: + return '---.-%' + elif percent == 100: + return '100%' + return '%6s' % ('%3.1f%%' % percent) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return None + if now is None: + now = time.time() + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return None + rate = float(current) / dif + return int((float(total) - float(current)) / rate) + + @staticmethod + def format_eta(eta): + if eta is None: + return '--:--' + return FileDownloader.format_seconds(eta) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + if speed is None: + return '%10s' % '---b/s' + return '%10s' % ('%s/s' % format_bytes(speed)) + + @staticmethod + def format_retries(retries): + return 'inf' if retries == float('inf') else '%.0f' % retries + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return int(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into an integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return int(round(number * multiplier)) + + def to_screen(self, *args, **kargs): + self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) + + def to_stderr(self, message): + self.ydl.to_stderr(message) + + def to_console_title(self, message): + self.ydl.to_console_title(message) + + def trouble(self, *args, **kargs): + self.ydl.trouble(*args, **kargs) + + def report_warning(self, *args, **kargs): + self.ydl.report_warning(*args, **kargs) + + def report_error(self, *args, **kargs): + self.ydl.report_error(*args, **kargs) + + def write_debug(self, *args, **kargs): + self.ydl.write_debug(*args, **kargs) + + def slow_down(self, start_time, now, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit') + if rate_limit is None or byte_counter == 0: + return + if now is None: + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + sleep_time = float(byte_counter) / rate_limit - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == '-' or \ + (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + return filename + return filename + '.part' + + def undo_temp_name(self, filename): + if filename.endswith('.part'): + return filename[:-len('.part')] + return filename + + def ytdl_filename(self, filename): + return filename + '.ytdl' + + def sanitize_open(self, filename, open_mode): + file_access_retries = self.params.get('file_access_retries', 10) + retry = 0 + while True: + try: + return sanitize_open(filename, open_mode) + except (IOError, OSError) as err: + retry = retry + 1 + if retry > file_access_retries or err.errno not in (errno.EACCES,): + raise + self.to_screen( + '[download] Got file access error. Retrying (attempt %d of %s) ...' + % (retry, self.format_retries(file_access_retries))) + time.sleep(0.01) + + def try_rename(self, old_filename, new_filename): + if old_filename == new_filename: + return + try: + os.replace(old_filename, new_filename) + except (IOError, OSError) as err: + self.report_error(f'unable to rename file: {err}') + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(encodeFilename(filename)): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return filetime + # Ignore obviously invalid dates + if filetime == 0: + return + try: + os.utime(filename, (time.time(), filetime)) + except Exception: + pass + return filetime + + def report_destination(self, filename): + """Report destination filename.""" + self.to_screen('[download] Destination: ' + filename) + + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): + self._multiline = QuietMultilinePrinter() + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): + self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) + else: + self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') + + def _finish_multiline_status(self): + self._multiline.end() + + _progress_styles = { + 'downloaded_bytes': 'light blue', + 'percent': 'light blue', + 'eta': 'yellow', + 'speed': 'green', + 'elapsed': 'bold white', + 'total_bytes': '', + 'total_bytes_estimate': '', + } + + def _report_progress_status(self, s, default_template): + for name, style in self._progress_styles.items(): + name = f'_{name}_str' + if name not in s: + continue + s[name] = self._format_progress(s[name], style) + s['_default_template'] = default_template % s + + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) + + def _format_progress(self, *args, **kwargs): + return self.ydl._format_text( + self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) + + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress'): + self.to_screen('[download] Download completed') + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + s['_percent_str'] = self.format_percent(100) + self._report_progress_status(s, msg_template) + return + + if s['status'] != 'downloading': + return + + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) + else: + s['_eta_str'] = 'Unknown' + + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' + + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' + else: + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s' + if s.get('fragment_index') and s.get('fragment_count'): + msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)' + elif s.get('fragment_index'): + msg_template += ' (frag %(fragment_index)s)' + self._report_progress_status(s, msg_template) + + def report_resuming_byte(self, resume_len): + """Report attempt to resume at given byte.""" + self.to_screen('[download] Resuming download at byte %s' % resume_len) + + def report_retry(self, err, count, retries): + """Report retry in case of HTTP error 5xx""" + self.to_screen( + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' + % (error_to_compat_str(err), count, self.format_retries(retries))) + + def report_file_already_downloaded(self, *args, **kwargs): + """Report file has already been fully downloaded.""" + return self.ydl.report_file_already_downloaded(*args, **kwargs) + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_screen('[download] Unable to resume') + + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + + def download(self, filename, info_dict, subtitle=False): + """Download to a filename using the info from info_dict + Return True on success and False otherwise + """ + + nooverwrites_and_exists = ( + not self.params.get('overwrites', True) + and os.path.exists(encodeFilename(filename)) + ) + + if not hasattr(filename, 'write'): + continuedl_and_exists = ( + self.params.get('continuedl', True) + and os.path.isfile(encodeFilename(filename)) + and not self.params.get('nopart', False) + ) + + # Check file already present + if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }, info_dict) + self._finish_multiline_status() + return True, False + + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = 0 + if type(self.params.get('sleep_interval_subtitles')) is int: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') + if sleep_interval_sub > 0: + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + sleep_interval_sub)) + time.sleep(sleep_interval_sub) + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True + + def real_download(self, filename, info_dict): + """Real download process. Redefine in subclasses.""" + raise NotImplementedError('This method must be implemented by subclasses') + + def _hook_progress(self, status, info_dict): + if not self._progress_hooks: + return + status['info_dict'] = info_dict + # youtube-dl passes the same status object to all the hooks. + # Some third party scripts seems to be relying on this. + # So keep this behavior if possible + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + # See YoutubeDl.py (search for progress_hooks) for a description of + # this interface + self._progress_hooks.append(ph) + + def _debug_cmd(self, args, exe=None): + if not self.params.get('verbose', False): + return + + str_args = [decodeArgument(a) for a in args] + + if exe is None: + exe = os.path.basename(str_args[0]) + + self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) diff --git a/plugins/youtube_download/yt_dlp/downloader/dash.py b/plugins/youtube_download/yt_dlp/downloader/dash.py new file mode 100644 index 0000000..a845ee7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/dash.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals +import time + +from ..downloader import get_suitable_downloader +from .fragment import FragmentFD + +from ..utils import urljoin + + +class DashSegmentsFD(FragmentFD): + """ + Download segments in a DASH manifest. External downloaders can take over + the fragment downloads by supporting the 'dash_frag_urls' protocol + """ + + FD_NAME = 'dashsegments' + + def real_download(self, filename, info_dict): + if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: + self.report_error('Live DASH videos are not supported') + + real_start = time.time() + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) + + requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] + args = [] + for fmt in requested_formats or [info_dict]: + try: + fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) + except TypeError: + fragment_count = None + ctx = { + 'filename': fmt.get('filepath') or filename, + 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), + 'total_frags': fragment_count, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, fmt) + ctx['start'] = real_start + + fragments_to_download = self._get_fragments(fmt, ctx) + + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + info_dict['fragments'] = list(fragments_to_download) + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + args.append([ctx, fragments_to_download, fmt]) + + return self.download_and_append_fragments_multiple(*args) + + def _resolve_fragments(self, fragments, ctx): + fragments = fragments(ctx) if callable(fragments) else fragments + return [next(iter(fragments))] if self.params.get('test') else fragments + + def _get_fragments(self, fmt, ctx): + fragment_base_url = fmt.get('fragment_base_url') + fragments = self._resolve_fragments(fmt['fragments'], ctx) + + frag_index = 0 + for i, fragment in enumerate(fragments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + yield { + 'frag_index': frag_index, + 'index': i, + 'url': fragment_url, + } diff --git a/plugins/youtube_download/yt_dlp/downloader/external.py b/plugins/youtube_download/yt_dlp/downloader/external.py new file mode 100644 index 0000000..f4fdcf1 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/external.py @@ -0,0 +1,526 @@ +from __future__ import unicode_literals + +import os.path +import re +import subprocess +import sys +import time + +from .fragment import FragmentFD +from ..compat import ( + compat_setenv, + compat_str, +) +from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS +from ..utils import ( + cli_option, + cli_valueless_option, + cli_bool_option, + _configuration_args, + determine_ext, + encodeFilename, + encodeArgument, + handle_youtubedl_headers, + check_executable, + Popen, + remove_end, +) + + +class ExternalFD(FragmentFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') + can_download_to_stdout = False + + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + try: + started = time.time() + retval = self._call_downloader(tmpfilename, info_dict) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + # Live stream downloading cancellation should be considered as + # correct and expected termination thus all postprocessing + # should take place + retval = 0 + self.to_screen('[%s] Interrupted by user' % self.get_basename()) + + if retval == 0: + status = { + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + } + if filename != '-': + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.try_rename(tmpfilename, filename) + status.update({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + }) + self._hook_progress(status, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % ( + self.get_basename(), retval)) + return False + + @classmethod + def get_basename(cls): + return cls.__name__[:-2].lower() + + @property + def exe(self): + return self.get_basename() + + @classmethod + def available(cls, path=None): + path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) + if path: + cls.exe = path + return path + return False + + @classmethod + def supports(cls, info_dict): + return ( + (cls.can_download_to_stdout or not info_dict.get('to_stdout')) + and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS) + + @classmethod + def can_download(cls, info_dict, path=None): + return cls.available(path) and cls.supports(info_dict) + + def _option(self, command_option, param): + return cli_option(self.params, command_option, param) + + def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): + return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) + + def _valueless_option(self, command_option, param, expected_value=True): + return cli_valueless_option(self.params, command_option, param, expected_value) + + def _configuration_args(self, keys=None, *args, **kwargs): + return _configuration_args( + self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + keys, *args, **kwargs) + + def _call_downloader(self, tmpfilename, info_dict): + """ Either overwrite this or implement _make_cmd """ + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + if 'fragments' not in info_dict: + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() + if p.returncode != 0: + self.to_stderr(stderr.decode('utf-8', 'replace')) + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status + self.to_stderr(stderr.decode('utf-8', 'replace')) + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = self.sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = self.sanitize_open(fragment_filename, 'rb') + except IOError as err: + if skip_unavailable_fragments and frag_index > 1: + self.report_skip_fragment(frag_index, err) + continue + self.report_error(f'Unable to open fragment {frag_index}; {err}') + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 + + +class CurlFD(ExternalFD): + AVAILABLE_OPT = '-V' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '--location', '-o', tmpfilename] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') + cmd += self._valueless_option('--silent', 'noprogress') + cmd += self._valueless_option('--verbose', 'verbose') + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry + cmd += self._option('--max-filesize', 'max_filesize') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._valueless_option('--insecure', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + def _call_downloader(self, tmpfilename, info_dict): + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + # curl writes the progress to stderr so don't capture it. + p = Popen(cmd) + p.communicate_or_kill() + return p.returncode + + +class AxelFD(ExternalFD): + AVAILABLE_OPT = '-V' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class WgetFD(ExternalFD): + AVAILABLE_OPT = '--version' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry + cmd += self._option('--bind-address', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class Aria2cFD(ExternalFD): + AVAILABLE_OPT = '-v' + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') + + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-c', + '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', + '--file-allocation=none', '-x16', '-j16', '-s16'] + if 'fragments' in info_dict: + cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] + else: + cmd += ['--min-split-size', '1M'] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--max-overall-download-limit', 'ratelimit') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--all-proxy', 'proxy') + cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') + cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') + cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') + cmd += self._configuration_args() + + # aria2c strips out spaces from the beginning/end of filenames and paths. + # We work around this issue by adding a "./" to the beginning of the + # filename and relative path, and adding a "/" at the end of the path. + # See: https://github.com/yt-dlp/yt-dlp/issues/276 + # https://github.com/ytdl-org/youtube-dl/issues/20312 + # https://github.com/aria2/aria2/issues/1373 + dn = os.path.dirname(tmpfilename) + if dn: + if not os.path.isabs(dn): + dn = '.%s%s' % (os.path.sep, dn) + cmd += ['--dir', dn + os.path.sep] + if 'fragments' not in info_dict: + cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--auto-file-renaming=false'] + + if 'fragments' in info_dict: + cmd += ['--file-allocation=none', '--uri-selector=inorder'] + url_list_file = '%s.frag.urls' % tmpfilename + url_list = [] + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) + url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + stream, _ = self.sanitize_open(url_list_file, 'wb') + stream.write('\n'.join(url_list).encode('utf-8')) + stream.close() + cmd += ['-i', url_list_file] + else: + cmd += ['--', info_dict['url']] + return cmd + + +class HttpieFD(ExternalFD): + AVAILABLE_OPT = '--version' + + @classmethod + def available(cls, path=None): + return super().available(path or 'http') + + def _make_cmd(self, tmpfilename, info_dict): + cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] + return cmd + + +class FFmpegFD(ExternalFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') + can_download_to_stdout = True + + @classmethod + def available(cls, path=None): + # TODO: Fix path for ffmpeg + # Fixme: This may be wrong when --ffmpeg-location is used + return FFmpegPostProcessor().available + + @classmethod + def supports(cls, info_dict): + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + + def on_process_started(self, proc, stdin): + """ Override this in subclasses """ + pass + + @classmethod + def can_merge_formats(cls, info_dict, params): + return ( + info_dict.get('requested_formats') + and info_dict.get('protocol') + and not params.get('allow_unplayable_formats') + and 'no-direct-merge' not in params.get('compat_opts', []) + and cls.can_download(info_dict)) + + def _call_downloader(self, tmpfilename, info_dict): + urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] + ffpp = FFmpegPostProcessor(downloader=self) + if not ffpp.available: + self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') + return False + ffpp.check_version() + + args = [ffpp.executable, '-y'] + + for log_level in ('quiet', 'verbose'): + if self.params.get(log_level, False): + args += ['-loglevel', log_level] + break + if not self.params.get('verbose'): + args += ['-hide_banner'] + + args += info_dict.get('_ffmpeg_args', []) + + # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead + seekable = info_dict.get('_seekable') + if seekable is not None: + # setting -seekable prevents ffmpeg from guessing if the server + # supports seeking(by adding the header `Range: bytes=0-`), which + # can cause problems in some cases + # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 + # http://trac.ffmpeg.org/ticket/6125#comment:10 + args += ['-seekable', '1' if seekable else '0'] + + # start_time = info_dict.get('start_time') or 0 + # if start_time: + # args += ['-ss', compat_str(start_time)] + # end_time = info_dict.get('end_time') + # if end_time: + # args += ['-t', compat_str(end_time - start_time)] + + if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + headers = handle_youtubedl_headers(info_dict['http_headers']) + args += [ + '-headers', + ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + + if proxy.startswith('socks'): + self.report_warning( + '%s does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) + + protocol = info_dict.get('protocol') + + if protocol == 'rtmp': + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') + if player_url is not None: + args += ['-rtmp_swfverify', player_url] + if page_url is not None: + args += ['-rtmp_pageurl', page_url] + if app is not None: + args += ['-rtmp_app', app] + if play_path is not None: + args += ['-rtmp_playpath', play_path] + if tc_url is not None: + args += ['-rtmp_tcurl', tc_url] + if flash_version is not None: + args += ['-rtmp_flashver', flash_version] + if live: + args += ['-rtmp_live', 'live'] + if isinstance(conn, list): + for entry in conn: + args += ['-rtmp_conn', entry] + elif isinstance(conn, compat_str): + args += ['-rtmp_conn', conn] + + for i, url in enumerate(urls): + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + + args += ['-c', 'copy'] + if info_dict.get('requested_formats') or protocol == 'http_dash_segments': + for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + stream_number = fmt.get('manifest_stream_number', 0) + args.extend(['-map', f'{i}:{stream_number}']) + + if self.params.get('test', False): + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + + ext = info_dict['ext'] + if protocol in ('m3u8', 'm3u8_native'): + use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') + if use_mpegts is None: + use_mpegts = info_dict.get('is_live') + if use_mpegts: + args += ['-f', 'mpegts'] + else: + args += ['-f', 'mp4'] + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + args += ['-bsf:a', 'aac_adtstoasc'] + elif protocol == 'rtmp': + args += ['-f', 'flv'] + elif ext == 'mp4' and tmpfilename == '-': + args += ['-f', 'mpegts'] + elif ext == 'unknown_video': + ext = determine_ext(remove_end(tmpfilename, '.part')) + if ext == 'unknown_video': + self.report_warning( + 'The video format is unknown and cannot be downloaded by ffmpeg. ' + 'Explicitly set the extension in the filename to attempt download in that format') + else: + self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename') + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + else: + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + + args += self._configuration_args(('_o1', '_o', '')) + + args = [encodeArgument(opt) for opt in args] + args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + self._debug_cmd(args) + + proc = Popen(args, stdin=subprocess.PIPE, env=env) + if url in ('-', 'pipe:'): + self.on_process_started(proc, proc.stdin) + try: + retval = proc.wait() + except BaseException as e: + # subprocces.run would send the SIGKILL signal to ffmpeg and the + # mp4 file couldn't be played, but if we ask ffmpeg to quit it + # produces a file that is playable (this is mostly useful for live + # streams). Note that Windows is not affected and produces playable + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + proc.communicate_or_kill(b'q') + else: + proc.kill() + proc.wait() + raise + return retval + + +class AVconvFD(FFmpegFD): + pass + + +_BY_NAME = dict( + (klass.get_basename(), klass) + for name, klass in globals().items() + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') +) + + +def list_external_downloaders(): + return sorted(_BY_NAME.keys()) + + +def get_external_downloader(external_downloader): + """ Given the name of the executable, see whether we support the given + downloader . """ + # Drop .exe extension on Windows + bn = os.path.splitext(os.path.basename(external_downloader))[0] + return _BY_NAME.get(bn) diff --git a/plugins/youtube_download/yt_dlp/downloader/f4m.py b/plugins/youtube_download/yt_dlp/downloader/f4m.py new file mode 100644 index 0000000..0008b7c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/f4m.py @@ -0,0 +1,439 @@ +from __future__ import division, unicode_literals + +import io +import itertools +import time + +from .fragment import FragmentFD +from ..compat import ( + compat_b64decode, + compat_etree_fromstring, + compat_urlparse, + compat_urllib_error, + compat_urllib_parse_urlparse, + compat_struct_pack, + compat_struct_unpack, +) +from ..utils import ( + fix_xml_ampersands, + xpath_text, +) + + +class DataTruncatedError(Exception): + pass + + +class FlvReader(io.BytesIO): + """ + Reader for Flv files + The file format is documented in https://www.adobe.com/devnet/f4v.html + """ + + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + + # Utility functions for reading numbers and strings + def read_unsigned_long_long(self): + return compat_struct_unpack('!Q', self.read_bytes(8))[0] + + def read_unsigned_int(self): + return compat_struct_unpack('!I', self.read_bytes(4))[0] + + def read_unsigned_char(self): + return compat_struct_unpack('!B', self.read_bytes(1))[0] + + def read_string(self): + res = b'' + while True: + char = self.read_bytes(1) + if char == b'\x00': + break + res += char + return res + + def read_box_info(self): + """ + Read a box and return the info as a tuple: (box_size, box_type, box_data) + """ + real_size = size = self.read_unsigned_int() + box_type = self.read_bytes(4) + header_end = 8 + if size == 1: + real_size = self.read_unsigned_long_long() + header_end = 16 + return real_size, box_type, self.read_bytes(real_size - header_end) + + def read_asrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + quality_entry_count = self.read_unsigned_char() + # QualityEntryCount + for i in range(quality_entry_count): + self.read_string() + + segment_run_count = self.read_unsigned_int() + segments = [] + for i in range(segment_run_count): + first_segment = self.read_unsigned_int() + fragments_per_segment = self.read_unsigned_int() + segments.append((first_segment, fragments_per_segment)) + + return { + 'segment_run': segments, + } + + def read_afrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + # time scale + self.read_unsigned_int() + + quality_entry_count = self.read_unsigned_char() + # QualitySegmentUrlModifiers + for i in range(quality_entry_count): + self.read_string() + + fragments_count = self.read_unsigned_int() + fragments = [] + for i in range(fragments_count): + first = self.read_unsigned_int() + first_ts = self.read_unsigned_long_long() + duration = self.read_unsigned_int() + if duration == 0: + discontinuity_indicator = self.read_unsigned_char() + else: + discontinuity_indicator = None + fragments.append({ + 'first': first, + 'ts': first_ts, + 'duration': duration, + 'discontinuity_indicator': discontinuity_indicator, + }) + + return { + 'fragments': fragments, + } + + def read_abst(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + + self.read_unsigned_int() # BootstrapinfoVersion + # Profile,Live,Update,Reserved + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 + # time scale + self.read_unsigned_int() + # CurrentMediaTime + self.read_unsigned_long_long() + # SmpteTimeCodeOffset + self.read_unsigned_long_long() + + self.read_string() # MovieIdentifier + server_count = self.read_unsigned_char() + # ServerEntryTable + for i in range(server_count): + self.read_string() + quality_count = self.read_unsigned_char() + # QualityEntryTable + for i in range(quality_count): + self.read_string() + # DrmData + self.read_string() + # MetaData + self.read_string() + + segments_count = self.read_unsigned_char() + segments = [] + for i in range(segments_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'asrt' + segment = FlvReader(box_data).read_asrt() + segments.append(segment) + fragments_run_count = self.read_unsigned_char() + fragments = [] + for i in range(fragments_run_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'afrt' + fragments.append(FlvReader(box_data).read_afrt()) + + return { + 'segments': segments, + 'fragments': fragments, + 'live': live, + } + + def read_bootstrap_info(self): + total_size, box_type, box_data = self.read_box_info() + assert box_type == b'abst' + return FlvReader(box_data).read_abst() + + +def read_bootstrap_info(bootstrap_bytes): + return FlvReader(bootstrap_bytes).read_bootstrap_info() + + +def build_fragments_list(boot_info): + """ Return a list of (segment, fragment) for each fragment in the video """ + res = [] + segment_run_table = boot_info['segments'][0] + fragment_run_entry_table = boot_info['fragments'][0]['fragments'] + first_frag_number = fragment_run_entry_table[0]['first'] + fragments_counter = itertools.count(first_frag_number) + for segment, fragments_count in segment_run_table['segment_run']: + # In some live HDS streams (for example Rai), `fragments_count` is + # abnormal and causing out-of-memory errors. It's OK to change the + # number of fragments for live streams as they are updated periodically + if fragments_count == 4294967295 and boot_info['live']: + fragments_count = 2 + for _ in range(fragments_count): + res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + + return res + + +def write_unsigned_int(stream, val): + stream.write(compat_struct_pack('!I', val)) + + +def write_unsigned_int_24(stream, val): + stream.write(compat_struct_pack('!I', val)[1:]) + + +def write_flv_header(stream): + """Writes the FLV header to stream""" + # FLV header + stream.write(b'FLV\x01') + stream.write(b'\x05') + stream.write(b'\x00\x00\x00\x09') + stream.write(b'\x00\x00\x00\x00') + + +def write_metadata_tag(stream, metadata): + """Writes optional metadata tag to stream""" + SCRIPT_TAG = b'\x12' + FLV_TAG_HEADER_LEN = 11 + + if metadata: + stream.write(SCRIPT_TAG) + write_unsigned_int_24(stream, len(metadata)) + stream.write(b'\x00\x00\x00\x00\x00\x00\x00') + stream.write(metadata) + write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) + + +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib + and 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url + + +class F4mFD(FragmentFD): + """ + A downloader for f4m manifests or AdobeHDS. + """ + + FD_NAME = 'f4m' + + def _get_unencrypted_media(self, doc): + media = doc.findall(_add_ns('media')) + if not media: + self.report_error('No media found') + if not self.params.get('allow_unplayable_formats'): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) + if not media: + self.report_error('Unsupported DRM') + return media + + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + # Sometimes non empty inline bootstrap info can be specified along + # with bootstrap url attribute (e.g. dummy inline bootstrap info + # contains whitespace characters in [1]). We will prefer bootstrap + # url over inline bootstrap info when present. + # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m + bootstrap_url = node.get('url') + if bootstrap_url: + bootstrap_url = compat_urlparse.urljoin( + base_url, bootstrap_url) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = compat_b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return boot_info, bootstrap_url + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + requested_bitrate = info_dict.get('tbr') + self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + # Some manifests may be malformed, e.g. prosiebensat1 generated manifests + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) + manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() + + doc = compat_etree_fromstring(manifest) + formats = [(int(f.attrib.get('bitrate', -1)), f) + for f in self._get_unencrypted_media(doc)] + if requested_bitrate is None or len(formats) == 1: + # get the best format + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + else: + rate, media = list(filter( + lambda f: int(f[0]) == requested_bitrate, formats))[0] + + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) + bootstrap_node = doc.find(_add_ns('bootstrapInfo')) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) + live = boot_info['live'] + metadata_node = media.find(_add_ns('metadata')) + if metadata_node is not None: + metadata = compat_b64decode(metadata_node.text) + else: + metadata = None + + fragments_list = build_fragments_list(boot_info) + test = self.params.get('test', False) + if test: + # We only download the first fragment + fragments_list = fragments_list[:1] + total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + + ctx = { + 'filename': filename, + 'total_frags': total_frags, + 'live': bool(live), + } + + self._prepare_frag_download(ctx) + + dest_stream = ctx['dest_stream'] + + if ctx['complete_frags_downloaded_bytes'] == 0: + write_flv_header(dest_stream) + if not live: + write_metadata_tag(dest_stream, metadata) + + base_url_parsed = compat_urllib_parse_urlparse(base_url) + + self._start_frag_download(ctx, info_dict) + + frag_index = 0 + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + name = 'Seg%d-Frag%d' % (seg_i, frag_i) + query = [] + if base_url_parsed.query: + query.append(base_url_parsed.query) + if akamai_pv: + query.append(akamai_pv.strip(';')) + if info_dict.get('extra_param_to_segment_url'): + query.append(info_dict['extra_param_to_segment_url']) + url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) + try: + success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) + if not success: + return False + reader = FlvReader(down_data) + while True: + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/ytdl-org/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise + if box_type == b'mdat': + self._append_fragment(ctx, box_data) + break + except (compat_urllib_error.HTTPError, ) as err: + if live and (err.code == 404 or err.code == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and not test and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) + + self._finish_frag_download(ctx, info_dict) + + return True diff --git a/plugins/youtube_download/yt_dlp/downloader/fragment.py b/plugins/youtube_download/yt_dlp/downloader/fragment.py new file mode 100644 index 0000000..19c0990 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/fragment.py @@ -0,0 +1,523 @@ +from __future__ import division, unicode_literals + +import http.client +import json +import math +import os +import time + +try: + import concurrent.futures + can_threaded_download = True +except ImportError: + can_threaded_download = False + +from .common import FileDownloader +from .http import HttpFD +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 +from ..compat import ( + compat_os_name, + compat_urllib_error, + compat_struct_pack, +) +from ..utils import ( + DownloadError, + error_to_compat_str, + encodeFilename, + sanitized_Request, +) + + +class HttpQuietDownloader(HttpFD): + def to_screen(self, *args, **kargs): + pass + + def report_retry(self, err, count, retries): + super().to_screen( + f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + + +class FragmentFD(FileDownloader): + """ + A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). + + Available options: + + fragment_retries: Number of times to retry a fragment for HTTP error (DASH + and hlsnative only) + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) + keep_fragments: Keep downloaded fragments on disk after downloading is + finished + concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads + _no_ytdl_file: Don't use .ytdl file + + For each incomplete fragment download yt-dlp keeps on disk a special + bookkeeping file with download state and metadata (in future such files will + be used for any incomplete download handled by yt-dlp). This file is + used to properly handle resuming, check download file consistency and detect + potential errors. The file has a .ytdl extension and represents a standard + JSON file of the following format: + + extractor: + Dictionary of extractor related data. TBD. + + downloader: + Dictionary of downloader related data. May contain following data: + current_fragment: + Dictionary with current (being downloaded) fragment data: + index: 0-based index of current fragment among all fragments + fragment_count: + Total count of fragments + + This feature is experimental and file format may change in future. + """ + + def report_retry_fragment(self, err, frag_index, count, retries): + self.to_screen( + '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' + % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) + + def report_skip_fragment(self, frag_index, err=None): + err = f' {err};' if err else '' + self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') + + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return sanitized_Request(url, None, headers) if headers else url + + def _prepare_and_start_frag_download(self, ctx, info_dict): + self._prepare_frag_download(ctx) + self._start_frag_download(ctx, info_dict) + + def __do_ytdl_file(self, ctx): + return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') + + def _read_ytdl_file(self, ctx): + assert 'ytdl_corrupt' not in ctx + stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + try: + ytdl_data = json.loads(stream.read()) + ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] + if 'extra_state' in ytdl_data['downloader']: + ctx['extra_state'] = ytdl_data['downloader']['extra_state'] + except Exception: + ctx['ytdl_corrupt'] = True + finally: + stream.close() + + def _write_ytdl_file(self, ctx): + frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + try: + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if 'extra_state' in ctx: + downloader['extra_state'] = ctx['extra_state'] + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + finally: + frag_index_stream.close() + + def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): + fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) + fragment_info_dict = { + 'url': frag_url, + 'http_headers': headers or info_dict.get('http_headers'), + 'request_data': request_data, + 'ctx_id': ctx.get('ctx_id'), + } + success = ctx['dl'].download(fragment_filename, fragment_info_dict) + if not success: + return False, None + if fragment_info_dict.get('filetime'): + ctx['fragment_filetime'] = fragment_info_dict.get('filetime') + ctx['fragment_filename_sanitized'] = fragment_filename + return True, self._read_fragment(ctx) + + def _read_fragment(self, ctx): + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + ctx['fragment_filename_sanitized'] = frag_sanitized + frag_content = down.read() + down.close() + return frag_content + + def _append_fragment(self, ctx, frag_content): + try: + ctx['dest_stream'].write(frag_content) + ctx['dest_stream'].flush() + finally: + if self.__do_ytdl_file(ctx): + self._write_ytdl_file(ctx) + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + del ctx['fragment_filename_sanitized'] + + def _prepare_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen( + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.report_destination(ctx['filename']) + dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': self.params.get('quiet'), + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit'), + 'retries': self.params.get('retries', 0), + 'nopart': self.params.get('nopart', False), + 'test': self.params.get('test', False), + } + ) + tmpfilename = self.temp_name(ctx['filename']) + open_mode = 'wb' + resume_len = 0 + + # Establish possible resume length + if os.path.isfile(encodeFilename(tmpfilename)): + open_mode = 'ab' + resume_len = os.path.getsize(encodeFilename(tmpfilename)) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + if self.__do_ytdl_file(ctx): + if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): + self._read_ytdl_file(ctx) + is_corrupt = ctx.get('ytdl_corrupt') is True + is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 + if is_corrupt or is_inconsistent: + message = ( + '.ytdl file is corrupt' if is_corrupt else + 'Inconsistent state of incomplete fragment download') + self.report_warning( + '%s. Restarting from the beginning ...' % message) + ctx['fragment_index'] = resume_len = 0 + if 'ytdl_corrupt' in ctx: + del ctx['ytdl_corrupt'] + self._write_ytdl_file(ctx) + else: + self._write_ytdl_file(ctx) + assert ctx['fragment_index'] == 0 + + dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode) + + ctx.update({ + 'dl': dl, + 'dest_stream': dest_stream, + 'tmpfilename': tmpfilename, + # Total complete fragments downloaded so far in bytes + 'complete_frags_downloaded_bytes': resume_len, + }) + + def _start_frag_download(self, ctx, info_dict): + resume_len = ctx['complete_frags_downloaded_bytes'] + total_frags = ctx['total_frags'] + ctx_id = ctx.get('ctx_id') + # This dict stores the download progress, it's updated by the progress + # hook + state = { + 'status': 'downloading', + 'downloaded_bytes': resume_len, + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, + 'filename': ctx['filename'], + 'tmpfilename': ctx['tmpfilename'], + } + + start = time.time() + ctx.update({ + 'started': start, + 'fragment_started': start, + # Amount of fragment's bytes downloaded by the time of the previous + # frag progress hook invocation + 'prev_frag_downloaded_bytes': 0, + }) + + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + if ctx_id is not None and s.get('ctx_id') != ctx_id: + return + + state['max_progress'] = ctx.get('max_progress') + state['progress_idx'] = ctx.get('progress_idx') + + time_now = time.time() + state['elapsed'] = time_now - start + frag_total_bytes = s.get('total_bytes') or 0 + s['fragment_info_dict'] = s.pop('info_dict', {}) + if not ctx['live']: + estimated_size = ( + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) + / (state['fragment_index'] + 1) * total_frags) + state['total_bytes_estimate'] = estimated_size + + if s['status'] == 'finished': + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] + state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] + ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_total_bytes) + ctx['fragment_started'] = time.time() + ctx['prev_frag_downloaded_bytes'] = 0 + else: + frag_downloaded_bytes = s['downloaded_bytes'] + state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] + if not ctx['live']: + state['eta'] = self.calc_eta( + start, time_now, estimated_size - resume_len, + state['downloaded_bytes'] - resume_len) + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_downloaded_bytes) + ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes + self._hook_progress(state, info_dict) + + ctx['dl'].add_progress_hook(frag_progress_hook) + + return start + + def _finish_frag_download(self, ctx, info_dict): + ctx['dest_stream'].close() + if self.__do_ytdl_file(ctx): + ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) + if os.path.isfile(ytdl_filename): + os.remove(ytdl_filename) + elapsed = time.time() - ctx['started'] + + if ctx['tmpfilename'] == '-': + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + else: + self.try_rename(ctx['tmpfilename'], ctx['filename']) + if self.params.get('updatetime', True): + filetime = ctx.get('fragment_filetime') + if filetime: + try: + os.utime(ctx['filename'], (time.time(), filetime)) + except Exception: + pass + downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) + + self._hook_progress({ + 'downloaded_bytes': downloaded_bytes, + 'total_bytes': downloaded_bytes, + 'filename': ctx['filename'], + 'status': 'finished', + 'elapsed': elapsed, + 'ctx_id': ctx.get('ctx_id'), + 'max_progress': ctx.get('max_progress'), + 'progress_idx': ctx.get('progress_idx'), + }, info_dict) + + def _prepare_external_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen( + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + + tmpfilename = self.temp_name(ctx['filename']) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + def decrypter(self, info_dict): + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + + def decrypt_fragment(fragment, frag_content): + decrypt_info = fragment.get('decrypt_info') + if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': + return frag_content + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block + # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, + # not what it decrypts to. + if self.params.get('test', False): + return frag_content + return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) + + return decrypt_fragment + + def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + ''' + @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... + all args must be either tuple or list + ''' + interrupt_trigger = [True] + max_progress = len(args) + if max_progress == 1: + return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + max_workers = self.params.get('concurrent_fragment_downloads', 1) + if max_progress > 1: + self._prepare_multiline_status(max_progress) + + def thread_func(idx, ctx, fragments, info_dict, tpe): + ctx['max_progress'] = max_progress + ctx['progress_idx'] = idx + return self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, + tpe=tpe, interrupt_trigger=interrupt_trigger) + + class FTPE(concurrent.futures.ThreadPoolExecutor): + # has to stop this or it's going to wait on the worker thread itself + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + spins = [] + if compat_os_name == 'nt': + self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. ' + 'This is a known issue and patches are welcome') + for idx, (ctx, fragments, info_dict) in enumerate(args): + tpe = FTPE(math.ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + spins.append((tpe, job)) + + result = True + for tpe, job in spins: + try: + result = result and job.result() + except KeyboardInterrupt: + interrupt_trigger[0] = False + finally: + tpe.shutdown(wait=True) + if not interrupt_trigger[0]: + raise KeyboardInterrupt() + return result + + def download_and_append_fragments( + self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, + tpe=None, interrupt_trigger=None): + if not interrupt_trigger: + interrupt_trigger = (True, ) + + fragment_retries = self.params.get('fragment_retries', 0) + is_fatal = ( + ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) + if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) + + if not pack_func: + pack_func = lambda frag_content, _: frag_content + + def download_fragment(fragment, ctx): + frag_index = ctx['fragment_index'] = fragment['frag_index'] + ctx['last_error'] = None + if not interrupt_trigger[0]: + return False, frag_index + headers = info_dict.get('http_headers', {}).copy() + byte_range = fragment.get('byte_range') + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) + + # Never skip the first fragment + fatal = is_fatal(fragment.get('index') or (frag_index - 1)) + count, frag_content = 0, None + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) + if not success: + return False, frag_index + break + except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). + count += 1 + ctx['last_error'] = err + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + except DownloadError: + # Don't retry fragment if error occurred during HTTP downloading + # itself since it has own retry settings + if not fatal: + break + raise + + if count > fragment_retries: + if not fatal: + return False, frag_index + ctx['dest_stream'].close() + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return False, frag_index + return frag_content, frag_index + + def append_fragment(frag_content, frag_index, ctx): + if not frag_content: + if not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + return True + else: + ctx['dest_stream'].close() + self.report_error( + 'fragment %s not found, unable to continue' % frag_index) + return False + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + return True + + decrypt_fragment = self.decrypter(info_dict) + + max_workers = math.ceil( + self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) + if can_threaded_download and max_workers > 1: + + def _download_fragment(fragment): + ctx_copy = ctx.copy() + frag_content, frag_index = download_fragment(fragment, ctx_copy) + return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + + self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: + for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + if not interrupt_trigger[0]: + break + ctx['fragment_filename_sanitized'] = frag_filename + ctx['fragment_index'] = frag_index + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + else: + for fragment in fragments: + if not interrupt_trigger[0]: + break + frag_content, frag_index = download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + + if finish_func is not None: + ctx['dest_stream'].write(finish_func()) + ctx['dest_stream'].flush() + self._finish_frag_download(ctx, info_dict) + return True diff --git a/plugins/youtube_download/yt_dlp/downloader/hls.py b/plugins/youtube_download/yt_dlp/downloader/hls.py new file mode 100644 index 0000000..e932fd6 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/hls.py @@ -0,0 +1,358 @@ +from __future__ import unicode_literals + +import re +import io +import binascii + +from ..downloader import get_suitable_downloader +from .fragment import FragmentFD +from .external import FFmpegFD + +from ..compat import ( + compat_pycrypto_AES, + compat_urlparse, +) +from ..utils import ( + parse_m3u8_attributes, + update_url_query, + bug_reports_message, +) +from .. import webvtt + + +class HlsFD(FragmentFD): + """ + Download segments in a m3u8 manifest. External downloaders can take over + the fragment downloads by supporting the 'm3u8_frag_urls' protocol and + re-defining 'supports_manifest' function + """ + + FD_NAME = 'hlsnative' + + @staticmethod + def can_download(manifest, info_dict, allow_unplayable_formats=False): + UNSUPPORTED_FEATURES = [ + # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # # event media playlists [4] + # r'#EXT-X-MAP:', # media initialization [5] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 + # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 + # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 + ] + if not allow_unplayable_formats: + UNSUPPORTED_FEATURES += [ + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + ] + + def check_results(): + yield not info_dict.get('is_live') + for feature in UNSUPPORTED_FEATURES: + yield not re.search(feature, manifest) + return all(check_results()) + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + s = urlh.read().decode('utf-8', 'ignore') + + can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if FFmpegFD.available(): + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' + else: + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' + 'Decryption will be performed natively, but will be extremely slow') + if not can_download: + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), s) + if has_drm and not self.params.get('allow_unplayable_formats'): + self.report_error( + 'This video is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format') + return False + message = message or 'Unsupported features have been detected' + fd = FFmpegFD(self.ydl, self.params) + self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') + return fd.real_download(filename, info_dict) + elif message: + self.report_warning(message) + + is_webvtt = info_dict['ext'] == 'vtt' + if is_webvtt: + real_downloader = None # Packing the fragments is not currently supported for external downloader + else: + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) + if real_downloader and not real_downloader.supports_manifest(s): + real_downloader = None + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + + def is_ad_fragment_start(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + + def is_ad_fragment_end(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + + fragments = [] + + media_frags = 0 + ad_frags = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if not line: + continue + if line.startswith('#'): + if is_ad_fragment_start(line): + ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False + continue + if ad_frag_next: + ad_frags += 1 + continue + media_frags += 1 + + ctx = { + 'filename': filename, + 'total_frags': media_frags, + 'ad_frags': ad_frags, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', {}) + + format_index = info_dict.get('format_index') + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + i = 0 + media_sequence = 0 + decrypt_info = {'METHOD': 'NONE'} + byte_range = {} + discontinuity_count = 0 + frag_index = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if line: + if not line.startswith('#'): + if format_index and discontinuity_count != format_index: + continue + if ad_frag_next: + continue + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + frag_url = ( + line + if re.match(r'^https?://', line) + else compat_urlparse.urljoin(man_url, line)) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence, + }) + media_sequence += 1 + + elif line.startswith('#EXT-X-MAP'): + if format_index and discontinuity_count != format_index: + continue + if frag_index > 0: + self.report_error( + 'Initialization fragment found after media fragments, unable to download') + return False + frag_index += 1 + map_info = parse_m3u8_attributes(line[11:]) + frag_url = ( + map_info.get('URI') + if re.match(r'^https?://', map_info.get('URI')) + else compat_urlparse.urljoin(man_url, map_info.get('URI'))) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence + }) + media_sequence += 1 + + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + + elif line.startswith('#EXT-X-KEY'): + decrypt_url = decrypt_info.get('URI') + decrypt_info = parse_m3u8_attributes(line[11:]) + if decrypt_info['METHOD'] == 'AES-128': + if 'IV' in decrypt_info: + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) + if not re.match(r'^https?://', decrypt_info['URI']): + decrypt_info['URI'] = compat_urlparse.urljoin( + man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None + + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): + media_sequence = int(line[22:]) + elif line.startswith('#EXT-X-BYTERANGE'): + splitted_byte_range = line[17:].split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + elif is_ad_fragment_start(line): + ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False + elif line.startswith('#EXT-X-DISCONTINUITY'): + discontinuity_count += 1 + i += 1 + + # We only download the first fragment during the test + if self.params.get('test', False): + fragments = [fragments[0] if fragments else None] + + if real_downloader: + info_dict['fragments'] = fragments + fd = real_downloader(self.ydl, self.params) + # TODO: Make progress updates work without hooking twice + # for ph in self._progress_hooks: + # fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + + if is_webvtt: + def pack_fragment(frag_content, frag_index): + output = io.StringIO() + adjust = 0 + overflow = False + mpegts_last = None + for block in webvtt.parse_fragment(frag_content): + if isinstance(block, webvtt.CueBlock): + extra_state['webvtt_mpegts_last'] = mpegts_last + if overflow: + extra_state['webvtt_mpegts_adjust'] += 1 + overflow = False + block.start += adjust + block.end += adjust + + dedup_window = extra_state.setdefault('webvtt_dedup_window', []) + + ready = [] + + i = 0 + is_new = True + while i < len(dedup_window): + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False + continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: + continue + ready.append(wblock) + i -= 1 + del dedup_window[i] + + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) + + # we only emit cues once they fall out of the duplicate window + continue + elif isinstance(block, webvtt.Magic): + # take care of MPEG PES timestamp overflow + if block.mpegts is None: + block.mpegts = 0 + extra_state.setdefault('webvtt_mpegts_adjust', 0) + block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 + if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): + overflow = True + block.mpegts += 1 << 33 + mpegts_last = block.mpegts + + if frag_index == 1: + extra_state['webvtt_mpegts'] = block.mpegts or 0 + extra_state['webvtt_local'] = block.local or 0 + # XXX: block.local = block.mpegts = None ? + else: + if block.mpegts is not None and block.local is not None: + adjust = ( + (block.mpegts - extra_state.get('webvtt_mpegts', 0)) + - (block.local - extra_state.get('webvtt_local', 0)) + ) + continue + elif isinstance(block, webvtt.HeaderBlock): + if frag_index != 1: + # XXX: this should probably be silent as well + # or verify that all segments contain the same data + self.report_warning(bug_reports_message( + 'Discarding a %s block found in the middle of the stream; ' + 'if the subtitles display incorrectly,' + % (type(block).__name__))) + continue + block.write_into(output) + + return output.getvalue().encode('utf-8') + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) + + return output.getvalue().encode('utf-8') + + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) + else: + return self.download_and_append_fragments(ctx, fragments, info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/http.py b/plugins/youtube_download/yt_dlp/downloader/http.py new file mode 100644 index 0000000..34a1eb5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/http.py @@ -0,0 +1,386 @@ +from __future__ import unicode_literals + +import errno +import os +import socket +import time +import random +import re + +from .common import FileDownloader +from ..compat import ( + compat_str, + compat_urllib_error, +) +from ..utils import ( + ContentTooShortError, + encodeFilename, + int_or_none, + sanitized_Request, + ThrottledDownload, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, +) + + +class HttpFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + request_data = info_dict.get('request_data', None) + + class DownloadContext(dict): + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + ctx = DownloadContext() + ctx.filename = filename + ctx.tmpfilename = self.temp_name(filename) + ctx.stream = None + + # Do not include the Accept-Encoding header + headers = {'Youtubedl-no-compression': 'True'} + add_headers = info_dict.get('http_headers') + if add_headers: + headers.update(add_headers) + + is_test = self.params.get('test', False) + chunk_size = self._TEST_FILE_SIZE if is_test else ( + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') + or 0) + + ctx.open_mode = 'wb' + ctx.resume_len = 0 + ctx.data_len = None + ctx.block_size = self.params.get('buffersize', 1024) + ctx.start_time = time.time() + ctx.chunk_size = None + throttle_start = None + + if self.params.get('continuedl', True): + # Establish possible resume length + if os.path.isfile(encodeFilename(ctx.tmpfilename)): + ctx.resume_len = os.path.getsize( + encodeFilename(ctx.tmpfilename)) + + ctx.is_resume = ctx.resume_len > 0 + + count = 0 + retries = self.params.get('retries', 0) + + class SucceedDownload(Exception): + pass + + class RetryDownload(Exception): + def __init__(self, source_error): + self.source_error = source_error + + class NextFragment(Exception): + pass + + def set_range(req, start, end): + range_header = 'bytes=%d-' % start + if end: + range_header += compat_str(end) + req.add_header('Range', range_header) + + def establish_connection(): + ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) + if not is_test and chunk_size else chunk_size) + if ctx.resume_len > 0: + range_start = ctx.resume_len + if ctx.is_resume: + self.report_resuming_byte(ctx.resume_len) + ctx.open_mode = 'ab' + elif ctx.chunk_size > 0: + range_start = 0 + else: + range_start = None + ctx.is_resume = False + range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None + if range_end and ctx.data_len is not None and range_end >= ctx.data_len: + range_end = ctx.data_len - 1 + has_range = range_start is not None + ctx.has_range = has_range + request = sanitized_Request(url, request_data, headers) + if has_range: + set_range(request, range_start, range_end) + # Establish connection + try: + try: + ctx.data = self.ydl.urlopen(request) + except (compat_urllib_error.URLError, ) as err: + # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 + reason = getattr(err, 'reason', None) + if isinstance(reason, socket.timeout): + raise RetryDownload(err) + raise err + # When trying to resume, Content-Range HTTP header of response has to be checked + # to match the value of requested Range HTTP header. This is due to a webservers + # that don't support resuming and serve a whole file with no Content-Range + # set in response despite of requested Range (see + # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) + if has_range: + content_range = ctx.data.headers.get('Content-Range') + if content_range: + content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + # Content-Range is present and matches requested Range, resume is possible + if content_range_m: + if range_start == int(content_range_m.group(1)): + content_range_end = int_or_none(content_range_m.group(2)) + content_len = int_or_none(content_range_m.group(3)) + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end) + if accept_content_len: + ctx.data_len = content_len + return + # Content-Range is either not present or invalid. Assuming remote webserver is + # trying to send the whole file, resume is not possible, so wiping the local file + # and performing entire redownload + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) + return + except (compat_urllib_error.HTTPError, ) as err: + if err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + ctx.data = self.ydl.urlopen( + sanitized_Request(url, request_data, headers)) + content_length = ctx.data.info()['Content-Length'] + except (compat_urllib_error.HTTPError, ) as err: + if err.code < 500 or err.code >= 600: + raise + else: + # Examine the reported length + if (content_length is not None + and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(ctx.filename) + self.try_rename(ctx.tmpfilename, ctx.filename) + self._hook_progress({ + 'filename': ctx.filename, + 'status': 'finished', + 'downloaded_bytes': ctx.resume_len, + 'total_bytes': ctx.resume_len, + }, info_dict) + raise SucceedDownload() + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return + elif err.code < 500 or err.code >= 600: + # Unexpected HTTP error + raise + raise RetryDownload(err) + except socket.timeout as err: + raise RetryDownload(err) + except socket.error as err: + if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): + # Connection reset is no problem, just retry + raise RetryDownload(err) + raise + + def download(): + nonlocal throttle_start + data_len = ctx.data.info().get('Content-length', None) + + # Range HTTP header may be ignored/unsupported by a webserver + # (e.g. extractor/scivee.py, extractor/bambuser.py). + # However, for a test we still would like to download just a piece of a file. + # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control + # block size when downloading a file. + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + data_len = self._TEST_FILE_SIZE + + if data_len is not None: + data_len = int(data_len) + ctx.resume_len + min_data_len = self.params.get('min_filesize') + max_data_len = self.params.get('max_filesize') + if min_data_len is not None and data_len < min_data_len: + self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + + byte_counter = 0 + ctx.resume_len + block_size = ctx.block_size + start = time.time() + + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring + + def retry(e): + to_stdout = ctx.tmpfilename == '-' + if ctx.stream is not None: + if not to_stdout: + ctx.stream.close() + ctx.stream = None + ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) + raise RetryDownload(e) + + while True: + try: + # Download and write + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + # socket.timeout is a subclass of socket.error but may not have + # errno set + except socket.timeout as e: + retry(e) + except socket.error as e: + # SSLError on python 2 (inherits socket.error) may have + # no errno set but this error message + if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': + retry(e) + raise + + byte_counter += len(data_block) + + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time + if ctx.stream is None: + try: + ctx.stream, ctx.tmpfilename = self.sanitize_open( + ctx.tmpfilename, ctx.open_mode) + assert ctx.stream is not None + ctx.filename = self.undo_temp_name(ctx.tmpfilename) + self.report_destination(ctx.filename) + except (OSError, IOError) as err: + self.report_error('unable to open for writing: %s' % str(err)) + return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + except (XAttrUnavailableError, XAttrMetadataError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + + try: + ctx.stream.write(data_block) + except (IOError, OSError) as err: + self.to_stderr('\n') + self.report_error('unable to write data: %s' % str(err)) + return False + + # Apply rate limit + self.slow_down(start, now, byte_counter - ctx.resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + before = after + + # Progress message + speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) + if ctx.data_len is None: + eta = None + else: + eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) + + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'total_bytes': ctx.data_len, + 'tmpfilename': ctx.tmpfilename, + 'filename': ctx.filename, + 'eta': eta, + 'speed': speed, + 'elapsed': now - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) + + if data_len is not None and byte_counter == data_len: + break + + if speed and speed < (self.params.get('throttledratelimit') or 0): + # The speed must stay below the limit for 3 seconds + # This prevents raising error when the speed temporarily goes down + if throttle_start is None: + throttle_start = now + elif now - throttle_start > 3: + if ctx.stream is not None and ctx.tmpfilename != '-': + ctx.stream.close() + raise ThrottledDownload() + elif speed: + throttle_start = None + + if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + ctx.resume_len = byte_counter + # ctx.block_size = block_size + raise NextFragment() + + if ctx.stream is None: + self.to_stderr('\n') + self.report_error('Did not get any data blocks') + return False + if ctx.tmpfilename != '-': + ctx.stream.close() + + if data_len is not None and byte_counter != data_len: + err = ContentTooShortError(byte_counter, int(data_len)) + if count <= retries: + retry(err) + raise err + + self.try_rename(ctx.tmpfilename, ctx.filename) + + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': ctx.filename, + 'status': 'finished', + 'elapsed': time.time() - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) + + return True + + while count <= retries: + try: + establish_connection() + return download() + except RetryDownload as e: + count += 1 + if count <= retries: + self.report_retry(e.source_error, count, retries) + else: + self.to_screen(f'[download] Got server HTTP error: {e.source_error}') + continue + except NextFragment: + continue + except SucceedDownload: + return True + + self.report_error('giving up after %s retries' % retries) + return False diff --git a/plugins/youtube_download/yt_dlp/downloader/ism.py b/plugins/youtube_download/yt_dlp/downloader/ism.py new file mode 100644 index 0000000..09516ab --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/ism.py @@ -0,0 +1,289 @@ +from __future__ import unicode_literals + +import time +import binascii +import io + +from .fragment import FragmentFD +from ..compat import ( + compat_Struct, + compat_urllib_error, +) + + +u8 = compat_Struct('>B') +u88 = compat_Struct('>Bx') +u16 = compat_Struct('>H') +u1616 = compat_Struct('>Hxx') +u32 = compat_Struct('>I') +u64 = compat_Struct('>Q') + +s88 = compat_Struct('>bx') +s16 = compat_Struct('>h') +s1616 = compat_Struct('>hxx') +s32 = compat_Struct('>i') + +unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) + +TRACK_ENABLED = 0x1 +TRACK_IN_MOVIE = 0x2 +TRACK_IN_PREVIEW = 0x4 + +SELF_CONTAINED = 0x1 + + +def box(box_type, payload): + return u32.pack(8 + len(payload)) + box_type + payload + + +def full_box(box_type, version, flags, payload): + return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) + + +def write_piff_header(stream, params): + track_id = params['track_id'] + fourcc = params['fourcc'] + duration = params['duration'] + timescale = params.get('timescale', 10000000) + language = params.get('language', 'und') + height = params.get('height', 0) + width = params.get('width', 0) + stream_type = params['stream_type'] + creation_time = modification_time = int(time.time()) + + ftyp_payload = b'isml' # major brand + ftyp_payload += u32.pack(1) # minor version + ftyp_payload += b'piff' + b'iso2' # compatible brands + stream.write(box(b'ftyp', ftyp_payload)) # File Type Box + + mvhd_payload = u64.pack(creation_time) + mvhd_payload += u64.pack(modification_time) + mvhd_payload += u32.pack(timescale) + mvhd_payload += u64.pack(duration) + mvhd_payload += s1616.pack(1) # rate + mvhd_payload += s88.pack(1) # volume + mvhd_payload += u16.pack(0) # reserved + mvhd_payload += u32.pack(0) * 2 # reserved + mvhd_payload += unity_matrix + mvhd_payload += u32.pack(0) * 6 # pre defined + mvhd_payload += u32.pack(0xffffffff) # next track id + moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box + + tkhd_payload = u64.pack(creation_time) + tkhd_payload += u64.pack(modification_time) + tkhd_payload += u32.pack(track_id) # track id + tkhd_payload += u32.pack(0) # reserved + tkhd_payload += u64.pack(duration) + tkhd_payload += u32.pack(0) * 2 # reserved + tkhd_payload += s16.pack(0) # layer + tkhd_payload += s16.pack(0) # alternate group + tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume + tkhd_payload += u16.pack(0) # reserved + tkhd_payload += unity_matrix + tkhd_payload += u1616.pack(width) + tkhd_payload += u1616.pack(height) + trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box + + mdhd_payload = u64.pack(creation_time) + mdhd_payload += u64.pack(modification_time) + mdhd_payload += u32.pack(timescale) + mdhd_payload += u64.pack(duration) + mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) + mdhd_payload += u16.pack(0) # pre defined + mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box + + hdlr_payload = u32.pack(0) # pre defined + if stream_type == 'audio': # handler type + hdlr_payload += b'soun' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SoundHandler\0' # name + elif stream_type == 'video': + hdlr_payload += b'vide' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'VideoHandler\0' # name + elif stream_type == 'text': + hdlr_payload += b'subt' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SubtitleHandler\0' # name + else: + assert False + mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box + + if stream_type == 'audio': + smhd_payload = s88.pack(0) # balance + smhd_payload += u16.pack(0) # reserved + media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header + elif stream_type == 'video': + vmhd_payload = u16.pack(0) # graphics mode + vmhd_payload += u16.pack(0) * 3 # opcolor + media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + elif stream_type == 'text': + media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header + else: + assert False + minf_payload = media_header_box + + dref_payload = u32.pack(1) # entry count + dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box + dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box + minf_payload += box(b'dinf', dinf_payload) # Data Information Box + + stsd_payload = u32.pack(1) # entry count + + sample_entry_payload = u8.pack(0) * 6 # reserved + sample_entry_payload += u16.pack(1) # data reference index + if stream_type == 'audio': + sample_entry_payload += u32.pack(0) * 2 # reserved + sample_entry_payload += u16.pack(params.get('channels', 2)) + sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u1616.pack(params['sampling_rate']) + + if fourcc == 'AACL': + sample_entry_box = box(b'mp4a', sample_entry_payload) + elif stream_type == 'video': + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u32.pack(0) * 3 # pre defined + sample_entry_payload += u16.pack(width) + sample_entry_payload += u16.pack(height) + sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi + sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi + sample_entry_payload += u32.pack(0) # reserved + sample_entry_payload += u16.pack(1) # frame count + sample_entry_payload += u8.pack(0) * 32 # compressor name + sample_entry_payload += u16.pack(0x18) # depth + sample_entry_payload += s16.pack(-1) # pre defined + + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + if fourcc in ('H264', 'AVC1'): + sps, pps = codec_private_data.split(u32.pack(1))[1:] + avcc_payload = u8.pack(1) # configuration version + avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) + avcc_payload += u16.pack(len(sps)) + avcc_payload += sps + avcc_payload += u8.pack(1) # number of pps + avcc_payload += u16.pack(len(pps)) + avcc_payload += pps + sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record + sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + else: + assert False + elif stream_type == 'text': + if fourcc == 'TTML': + sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace + sample_entry_payload += b'\0' # schema location + sample_entry_payload += b'\0' # auxilary mime types(??) + sample_entry_box = box(b'stpp', sample_entry_payload) + else: + assert False + else: + assert False + stsd_payload += sample_entry_box + + stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box + + stts_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box + + stsc_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box + + stco_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box + + minf_payload += box(b'stbl', stbl_payload) # Sample Table Box + + mdia_payload += box(b'minf', minf_payload) # Media Information Box + + trak_payload += box(b'mdia', mdia_payload) # Media Box + + moov_payload += box(b'trak', trak_payload) # Track Box + + mehd_payload = u64.pack(duration) + mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box + + trex_payload = u32.pack(track_id) # track id + trex_payload += u32.pack(1) # default sample description index + trex_payload += u32.pack(0) # default sample duration + trex_payload += u32.pack(0) # default sample size + trex_payload += u32.pack(0) # default sample flags + mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box + + moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box + stream.write(box(b'moov', moov_payload)) # Movie Box + + +def extract_box_data(data, box_sequence): + data_reader = io.BytesIO(data) + while True: + box_size = u32.unpack(data_reader.read(4))[0] + box_type = data_reader.read(4) + if box_type == box_sequence[0]: + box_data = data_reader.read(box_size - 8) + if len(box_sequence) == 1: + return box_data + return extract_box_data(box_data, box_sequence[1:]) + data_reader.seek(box_size - 8, 1) + + +class IsmFD(FragmentFD): + """ + Download segments in a ISM manifest + """ + + FD_NAME = 'ism' + + def real_download(self, filename, info_dict): + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(segments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'ism_track_written': False, + }) + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + frag_index = 0 + for i, segment in enumerate(segments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + count = 0 + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + if not success: + return False + if not extra_state['ism_track_written']: + tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) + info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] + write_piff_header(ctx['dest_stream'], info_dict['_download_params']) + extra_state['ism_track_written'] = True + self._append_fragment(ctx, frag_content) + break + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(frag_index) + continue + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False + + self._finish_frag_download(ctx, info_dict) + + return True diff --git a/plugins/youtube_download/yt_dlp/downloader/mhtml.py b/plugins/youtube_download/yt_dlp/downloader/mhtml.py new file mode 100644 index 0000000..1477f65 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/mhtml.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import io +import quopri +import re +import uuid + +from .fragment import FragmentFD +from ..utils import ( + escapeHTML, + formatSeconds, + srt_subtitles_timecode, + urljoin, +) +from ..version import __version__ as YT_DLP_VERSION + + +class MhtmlFD(FragmentFD): + FD_NAME = 'mhtml' + + _STYLESHEET = """\ +html, body { + margin: 0; + padding: 0; + height: 100vh; +} + +html { + overflow-y: scroll; + scroll-snap-type: y mandatory; +} + +body { + scroll-snap-type: y mandatory; + display: flex; + flex-flow: column; +} + +body > figure { + max-width: 100vw; + max-height: 100vh; + scroll-snap-align: center; +} + +body > figure > figcaption { + text-align: center; + height: 2.5em; +} + +body > figure > img { + display: block; + margin: auto; + max-width: 100%; + max-height: calc(100vh - 5em); +} +""" + _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) + _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) + + @staticmethod + def _escape_mime(s): + return '=?utf-8?Q?' + (b''.join( + bytes((b,)) if b >= 0x20 else b'=%02X' % b + for b in quopri.encodestring(s.encode('utf-8'), header=True) + )).decode('us-ascii') + '?=' + + def _gen_cid(self, i, fragment, frag_boundary): + return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + + def _gen_stub(self, *, fragments, frag_boundary, title): + output = io.StringIO() + + output.write(( + '' + '' + '' + '' '' + '' '{title}' + '' '' + '' + ).format( + version=escapeHTML(YT_DLP_VERSION), + styles=self._STYLESHEET, + title=escapeHTML(title) + )) + + t0 = 0 + for i, frag in enumerate(fragments): + output.write('
') + try: + t1 = t0 + frag['duration'] + output.write(( + '
Slide #{num}: {t0} – {t1} (duration: {duration})
' + ).format( + num=i + 1, + t0=srt_subtitles_timecode(t0), + t1=srt_subtitles_timecode(t1), + duration=formatSeconds(frag['duration'], msec=True) + )) + except (KeyError, ValueError, TypeError): + t1 = None + output.write(( + '
Slide #{num}
' + ).format(num=i + 1)) + output.write(''.format( + cid=self._gen_cid(i, frag, frag_boundary))) + output.write('
') + t0 = t1 + + return output.getvalue() + + def real_download(self, filename, info_dict): + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) + + ctx = { + 'filename': filename, + 'total_frags': len(fragments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'header_written': False, + 'mime_boundary': str(uuid.uuid4()).replace('-', ''), + }) + + frag_boundary = extra_state['mime_boundary'] + + if not extra_state['header_written']: + stub = self._gen_stub( + fragments=fragments, + frag_boundary=frag_boundary, + title=title + ) + + ctx['dest_stream'].write(( + 'MIME-Version: 1.0\r\n' + 'From: \r\n' + 'To: \r\n' + 'Subject: {title}\r\n' + 'Content-type: multipart/related; ' + '' 'boundary="{boundary}"; ' + '' 'type="text/html"\r\n' + 'X.yt-dlp.Origin: {origin}\r\n' + '\r\n' + '--{boundary}\r\n' + 'Content-Type: text/html; charset=utf-8\r\n' + 'Content-Length: {length}\r\n' + '\r\n' + '{stub}\r\n' + ).format( + origin=origin, + boundary=frag_boundary, + length=len(stub), + title=self._escape_mime(title), + stub=stub + ).encode('utf-8')) + extra_state['header_written'] = True + + for i, fragment in enumerate(fragments): + if (i + 1) <= ctx['fragment_index']: + continue + + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + if not success: + continue + + mime_type = b'image/jpeg' + if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): + mime_type = b'image/png' + if frag_content.startswith((b'GIF87a', b'GIF89a')): + mime_type = b'image/gif' + if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + mime_type = b'image/webp' + + frag_header = io.BytesIO() + frag_header.write( + b'--%b\r\n' % frag_boundary.encode('us-ascii')) + frag_header.write( + b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) + frag_header.write( + b'Content-type: %b\r\n' % mime_type) + frag_header.write( + b'Content-length: %u\r\n' % len(frag_content)) + frag_header.write( + b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) + frag_header.write( + b'X.yt-dlp.Duration: %f\r\n' % fragment['duration']) + frag_header.write(b'\r\n') + self._append_fragment( + ctx, frag_header.getvalue() + frag_content + b'\r\n') + + ctx['dest_stream'].write( + b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) + self._finish_frag_download(ctx, info_dict) + return True diff --git a/plugins/youtube_download/yt_dlp/downloader/niconico.py b/plugins/youtube_download/yt_dlp/downloader/niconico.py new file mode 100644 index 0000000..521dfec --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/niconico.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import threading + +from .common import FileDownloader +from ..downloader import get_suitable_downloader +from ..extractor.niconico import NiconicoIE +from ..utils import sanitized_Request + + +class NiconicoDmcFD(FileDownloader): + """ Downloading niconico douga from DMC with heartbeat """ + + FD_NAME = 'niconico_dmc' + + def real_download(self, filename, info_dict): + self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + + ie = NiconicoIE(self.ydl) + info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) + + fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) + + success = download_complete = False + timer = [None] + heartbeat_lock = threading.Lock() + heartbeat_url = heartbeat_info_dict['url'] + heartbeat_data = heartbeat_info_dict['data'].encode() + heartbeat_interval = heartbeat_info_dict.get('interval', 30) + + request = sanitized_Request(heartbeat_url, heartbeat_data) + + def heartbeat(): + try: + self.ydl.urlopen(request).read() + except Exception: + self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + + with heartbeat_lock: + if not download_complete: + timer[0] = threading.Timer(heartbeat_interval, heartbeat) + timer[0].start() + + heartbeat_info_dict['ping']() + self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) + try: + heartbeat() + if type(fd).__name__ == 'HlsFD': + info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) + success = fd.real_download(filename, info_dict) + finally: + if heartbeat_lock: + with heartbeat_lock: + timer[0].cancel() + download_complete = True + return success diff --git a/plugins/youtube_download/yt_dlp/downloader/rtmp.py b/plugins/youtube_download/yt_dlp/downloader/rtmp.py new file mode 100644 index 0000000..90f1acf --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/rtmp.py @@ -0,0 +1,217 @@ +from __future__ import unicode_literals + +import os +import re +import subprocess +import time + +from .common import FileDownloader +from ..compat import compat_str +from ..utils import ( + check_executable, + encodeFilename, + encodeArgument, + get_exe_version, + Popen, +) + + +def rtmpdump_version(): + return get_exe_version( + 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') + + +class RtmpFD(FileDownloader): + def real_download(self, filename, info_dict): + def run_rtmpdump(args): + start = time.time() + resume_percent = None + resume_downloaded_data_len = None + proc = Popen(args, stderr=subprocess.PIPE) + cursor_in_new_line = True + proc_stderr_closed = False + try: + while not proc_stderr_closed: + # read line from stderr + line = '' + while True: + char = proc.stderr.read(1) + if not char: + proc_stderr_closed = True + break + if char in [b'\r', b'\n']: + break + line += char.decode('ascii', 'replace') + if not line: + # proc_stderr_closed is True + continue + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + percent = float(mobj.group(2)) + if not resume_percent: + resume_percent = percent + resume_downloaded_data_len = downloaded_data_len + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) + data_len = None + if percent > 0: + data_len = int(downloaded_data_len * 100 / percent) + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': downloaded_data_len, + 'total_bytes_estimate': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'eta': eta, + 'elapsed': time_now - start, + 'speed': speed, + }, info_dict) + cursor_in_new_line = False + else: + # no percent for live streams + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + time_now = time.time() + speed = self.calc_speed(start, time_now, downloaded_data_len) + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'elapsed': time_now - start, + 'speed': speed, + }, info_dict) + cursor_in_new_line = False + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen('') + cursor_in_new_line = True + self.to_screen('[rtmpdump] ' + line) + if not cursor_in_new_line: + self.to_screen('') + return proc.wait() + except BaseException: # Including KeyboardInterrupt + proc.kill() + proc.wait() + raise + + url = info_dict['url'] + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') + protocol = info_dict.get('rtmp_protocol') + real_time = info_dict.get('rtmp_real_time', False) + no_resume = info_dict.get('no_resume', False) + continue_dl = self.params.get('continuedl', True) + + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) + + # Check for rtmpdump first + if not check_executable('rtmpdump', ['-h']): + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') + return False + + # Download using rtmpdump. rtmpdump returns exit code 2 when + # the connection was interrupted and resuming appears to be + # possible. This is part of rtmpdump's normal usage, AFAIK. + basic_args = [ + 'rtmpdump', '--verbose', '-r', url, + '-o', tmpfilename] + if player_url is not None: + basic_args += ['--swfVfy', player_url] + if page_url is not None: + basic_args += ['--pageUrl', page_url] + if app is not None: + basic_args += ['--app', app] + if play_path is not None: + basic_args += ['--playpath', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', tc_url] + if test: + basic_args += ['--stop', '1'] + if flash_version is not None: + basic_args += ['--flashVer', flash_version] + if live: + basic_args += ['--live'] + if isinstance(conn, list): + for entry in conn: + basic_args += ['--conn', entry] + elif isinstance(conn, compat_str): + basic_args += ['--conn', conn] + if protocol is not None: + basic_args += ['--protocol', protocol] + if real_time: + basic_args += ['--realtime'] + + args = basic_args + if not no_resume and continue_dl and not live: + args += ['--resume'] + if not live and continue_dl: + args += ['--skip', '1'] + + args = [encodeArgument(a) for a in args] + + self._debug_cmd(args, exe='rtmpdump') + + RD_SUCCESS = 0 + RD_FAILED = 1 + RD_INCOMPLETE = 2 + RD_NO_CONNECT = 3 + + started = time.time() + + try: + retval = run_rtmpdump(args) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + retval = RD_SUCCESS + self.to_screen('\n[rtmpdump] Interrupted by user') + + if retval == RD_NO_CONNECT: + self.report_error('[rtmpdump] Could not connect to RTMP server.') + return False + + while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: + prevsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + time.sleep(5.0) # This seems to be needed + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) + cursize = os.path.getsize(encodeFilename(tmpfilename)) + if prevsize == cursize and retval == RD_FAILED: + break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: + self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = RD_SUCCESS + break + if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + }, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('rtmpdump exited with code %d' % retval) + return False diff --git a/plugins/youtube_download/yt_dlp/downloader/rtsp.py b/plugins/youtube_download/yt_dlp/downloader/rtsp.py new file mode 100644 index 0000000..7815d59 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/rtsp.py @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +import os +import subprocess + +from .common import FileDownloader +from ..utils import ( + check_executable, + encodeFilename, +) + + +class RtspFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') + return False + + self._debug_cmd(args) + + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % (args[0], retval)) + return False diff --git a/plugins/youtube_download/yt_dlp/downloader/websocket.py b/plugins/youtube_download/yt_dlp/downloader/websocket.py new file mode 100644 index 0000000..daac348 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/websocket.py @@ -0,0 +1,62 @@ +import os +import signal +import asyncio +import threading + +try: + import websockets +except (ImportError, SyntaxError): + # websockets 3.10 on python 3.6 causes SyntaxError + # See https://github.com/yt-dlp/yt-dlp/issues/2633 + has_websockets = False +else: + has_websockets = True + +from .common import FileDownloader +from .external import FFmpegFD + + +class FFmpegSinkFD(FileDownloader): + """ A sink to ffmpeg for downloading fragments in any form """ + + def real_download(self, filename, info_dict): + info_copy = info_dict.copy() + info_copy['url'] = '-' + + async def call_conn(proc, stdin): + try: + await self.real_connection(stdin, info_dict) + except (BrokenPipeError, OSError): + pass + finally: + try: + stdin.flush() + stdin.close() + except OSError: + pass + os.kill(os.getpid(), signal.SIGINT) + + class FFmpegStdinFD(FFmpegFD): + @classmethod + def get_basename(cls): + return FFmpegFD.get_basename() + + def on_process_started(self, proc, stdin): + thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) + thread.start() + + return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) + + async def real_connection(self, sink, info_dict): + """ Override this in subclasses """ + raise NotImplementedError('This method must be implemented by subclasses') + + +class WebSocketFragmentFD(FFmpegSinkFD): + async def real_connection(self, sink, info_dict): + async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: + while True: + recv = await ws.recv() + if isinstance(recv, str): + recv = recv.encode('utf8') + sink.write(recv) diff --git a/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py b/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py new file mode 100644 index 0000000..ef4205e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py @@ -0,0 +1,236 @@ +from __future__ import division, unicode_literals + +import json +import time + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + try_get, + dict_get, + int_or_none, + RegexNotFoundError, +) +from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE + + +class YoutubeLiveChatFD(FragmentFD): + """ Downloads YouTube live chats fragment by fragment """ + + FD_NAME = 'youtube_live_chat' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + fragment_retries = self.params.get('fragment_retries', 0) + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + ie = YT_BaseIE(self.ydl) + + start_time = int(time.time() * 1000) + + def dl_fragment(url, data=None, headers=None): + http_headers = info_dict.get('http_headers', {}) + if headers: + http_headers = http_headers.copy() + http_headers.update(headers) + return self._download_fragment(ctx, url, info_dict, http_headers, data) + + def parse_actions_replay(live_chat_continuation): + offset = continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + if offset is not None: + continuation = try_get( + live_chat_continuation, + lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) + if continuation: + continuation_id = continuation.get('continuation') + click_tracking_params = continuation.get('clickTrackingParams') + self._append_fragment(ctx, processed_fragment) + return continuation_id, offset, click_tracking_params + + def try_refresh_replay_beginning(live_chat_continuation): + # choose the second option that contains the unfiltered live chat replay + refresh_continuation = try_get( + live_chat_continuation, + lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) + if refresh_continuation: + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + refresh_continuation_id = refresh_continuation.get('continuation') + offset = 0 + click_tracking_params = refresh_continuation.get('trackingParams') + return refresh_continuation_id, offset, click_tracking_params + return parse_actions_replay(live_chat_continuation) + + live_offset = 0 + + def parse_actions_live(live_chat_continuation): + nonlocal live_offset + continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + timestamp = self.parse_live_timestamp(action) + if timestamp is not None: + live_offset = timestamp - start_time + # compatibility with replay format + pseudo_action = { + 'replayChatItemAction': {'actions': [action]}, + 'videoOffsetTimeMsec': str(live_offset), + 'isLive': True, + } + processed_fragment.extend( + json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_data_getters = [ + lambda x: x['continuations'][0]['invalidationContinuationData'], + lambda x: x['continuations'][0]['timedContinuationData'], + ] + continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) + if continuation_data: + continuation_id = continuation_data.get('continuation') + click_tracking_params = continuation_data.get('clickTrackingParams') + timeout_ms = int_or_none(continuation_data.get('timeoutMs')) + if timeout_ms is not None: + time.sleep(timeout_ms / 1000) + self._append_fragment(ctx, processed_fragment) + return continuation_id, live_offset, click_tracking_params + + def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): + count = 0 + while count <= fragment_retries: + try: + success, raw_fragment = dl_fragment(url, request_data, headers) + if not success: + return False, None, None, None + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None + if not data: + data = json.loads(raw_fragment) + live_chat_continuation = try_get( + data, + lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} + if info_dict['protocol'] == 'youtube_live_chat_replay': + if frag_index == 1: + continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation) + else: + continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation) + elif info_dict['protocol'] == 'youtube_live_chat': + continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation) + return True, continuation_id, offset, click_tracking_params + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False, None, None, None + + self._prepare_and_start_frag_download(ctx, info_dict) + + success, raw_fragment = dl_fragment(info_dict['url']) + if not success: + return False + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + return False + continuation_id = try_get( + data, + lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) + + if not ytcfg: + return False + api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) + innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) + if not api_key or not innertube_context: + return False + visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) + if info_dict['protocol'] == 'youtube_live_chat_replay': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id + elif info_dict['protocol'] == 'youtube_live_chat': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id + + frag_index = offset = 0 + click_tracking_params = None + while continuation_id is not None: + frag_index += 1 + request_data = { + 'context': innertube_context, + 'continuation': continuation_id, + } + if frag_index > 1: + request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + if click_tracking_params: + request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} + headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + url, frag_index, fragment_request_data, headers) + else: + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + chat_page_url, frag_index) + if not success: + return False + if test: + break + + self._finish_frag_download(ctx, info_dict) + return True + + @staticmethod + def parse_live_timestamp(action): + action_content = dict_get( + action, + ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) + if not isinstance(action_content, dict): + return None + item = dict_get(action_content, ['item', 'bannerRenderer']) + if not isinstance(item, dict): + return None + renderer = dict_get(item, [ + # text + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + # ticker + 'liveChatTickerPaidMessageItemRenderer', + 'liveChatTickerSponsorItemRenderer', + # banner + 'liveChatBannerRenderer', + ]) + if not isinstance(renderer, dict): + return None + parent_item_getters = [ + lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], + lambda x: x['contents'], + ] + parent_item = try_get(renderer, parent_item_getters, dict) + if parent_item: + renderer = dict_get(parent_item, [ + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + ]) + if not isinstance(renderer, dict): + return None + return int_or_none(renderer.get('timestampUsec'), 1000) diff --git a/plugins/youtube_download/yt_dlp/extractor/__init__.py b/plugins/youtube_download/yt_dlp/extractor/__init__.py new file mode 100644 index 0000000..b354842 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/__init__.py @@ -0,0 +1,54 @@ +import os + +from ..utils import load_plugins + +_LAZY_LOADER = False +if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + try: + from .lazy_extractors import * + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True + except ImportError: + pass + +if not _LAZY_LOADER: + from .extractors import * + _ALL_CLASSES = [ + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' + ] + _ALL_CLASSES.append(GenericIE) + +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) +_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES + + +def gen_extractor_classes(): + """ Return a list of supported extractors. + The order does matter; the first extractor matched is the one handling the URL. + """ + return _ALL_CLASSES + + +def gen_extractors(): + """ Return a list of an instance of every supported extractor. + The order does matter; the first extractor matched is the one handling the URL. + """ + return [klass() for klass in gen_extractor_classes()] + + +def list_extractors(age_limit): + """ + Return a list of extractors that are suitable for the given age, + sorted by extractor ID. + """ + + return sorted( + filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), + key=lambda ie: ie.IE_NAME.lower()) + + +def get_info_extractor(ie_name): + """Returns the info extractor class with the given ie_name""" + return globals()[ie_name + 'IE'] diff --git a/plugins/youtube_download/yt_dlp/extractor/abc.py b/plugins/youtube_download/yt_dlp/extractor/abc.py new file mode 100644 index 0000000..9d6f5a4 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/abc.py @@ -0,0 +1,318 @@ +from __future__ import unicode_literals + +import hashlib +import hmac +import re +import time + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + dict_get, + ExtractorError, + js_to_json, + int_or_none, + parse_iso8601, + str_or_none, + try_get, + unescapeHTML, + update_url_query, +) + + +class ABCIE(InfoExtractor): + IE_NAME = 'abc.net.au' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' + + _TESTS = [{ + 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', + 'md5': 'cb3dd03b18455a661071ee1e28344d9f', + 'info_dict': { + 'id': '5868334', + 'ext': 'mp4', + 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', + 'description': 'md5:809ad29c67a05f54eb41f2a105693a67', + }, + 'skip': 'this video has expired', + }, { + 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', + 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121', + 'info_dict': { + 'id': 'NvqvPeNZsHU', + 'ext': 'mp4', + 'upload_date': '20150816', + 'uploader': 'ABC News (Australia)', + 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', + 'uploader_id': 'NewsOnABC', + 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', + }, + 'add_ie': ['Youtube'], + 'skip': 'Not accessible from Travis CI server', + }, { + 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', + 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', + 'info_dict': { + 'id': '6880080', + 'ext': 'mp3', + 'title': 'NAB lifts interest rates, following Westpac and CBA', + 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', + }, + }, { + 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', + 'only_matching': True, + }, { + 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914', + 'info_dict': { + 'id': '10527914', + 'ext': 'mp4', + 'title': 'WWI Centenary', + 'description': 'md5:c2379ec0ca84072e86b446e536954546', + } + }, { + 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', + 'info_dict': { + 'id': '12342074', + 'ext': 'mp4', + 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', + 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', + } + }, { + 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', + 'info_dict': { + 'id': 'tDL8Ld4dK_8', + 'ext': 'mp4', + 'title': 'Fortnite Banned From Apple and Google App Stores', + 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651', + 'upload_date': '20200813', + 'uploader': 'Behind the News', + 'uploader_id': 'behindthenews', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + mobj = re.search(r'[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage) + if mobj: + urls_info = mobj.groupdict() + youtube = False + video = False + else: + mobj = re.search(r'External Link:', + webpage) + if mobj is None: + mobj = re.search(r'', + webpage, 'embed url')) + if VKIE.suitable(embed_url): + return self.url_result(embed_url, VKIE.ie_key(), video_id) + + embed_page = self._download_webpage( + embed_url, video_id, headers={'Referer': url}) + video_ext = self._get_cookies(embed_url).get('video_ext') + if video_ext: + video_ext = compat_urllib_parse_unquote(video_ext.value) + if not video_ext: + video_ext = compat_b64decode(self._search_regex( + r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', + embed_page, 'video_ext')).decode() + video_id, sig, _, access_token = video_ext.split(':') + item = self._download_json( + 'https://api.vk.com/method/video.get', video_id, + headers={'User-Agent': 'okhttp/3.4.1'}, query={ + 'access_token': access_token, + 'sig': sig, + 'v': 5.44, + 'videos': video_id, + })['response']['items'][0] + title = item['title'] + + formats = [] + for f_id, f_url in item.get('files', {}).items(): + if f_id == 'external': + return self.url_result(f_url) + ext, height = f_id.split('_') + formats.append({ + 'format_id': height + 'p', + 'url': f_url, + 'height': int_or_none(height), + 'ext': ext, + }) + self._sort_formats(formats) + + thumbnails = [] + for k, v in item.items(): + if k.startswith('photo_') and v: + width = k.replace('photo_', '') + thumbnails.append({ + 'id': width, + 'url': v, + 'width': int_or_none(width), + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'comment_count': int_or_none(item.get('comments')), + 'description': item.get('description'), + 'duration': int_or_none(item.get('duration')), + 'thumbnails': thumbnails, + 'timestamp': int_or_none(item.get('date')), + 'uploader': item.get('owner_id'), + 'view_count': int_or_none(item.get('views')), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bitchute.py b/plugins/youtube_download/yt_dlp/extractor/bitchute.py new file mode 100644 index 0000000..dcae6f4 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bitchute.py @@ -0,0 +1,158 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + GeoRestrictedError, + orderedSet, + unified_strdate, + urlencode_postdata, +) + + +class BitChuteIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', + 'md5': '7e427d7ed7af5a75b5855705ec750e2b', + 'info_dict': { + 'id': 'szoMrox2JEI', + 'ext': 'mp4', + 'title': 'This is the first video on #BitChute !', + 'description': 'md5:a0337e7b1fe39e32336974af8173a034', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'BitChute', + 'upload_date': '20170103', + }, + }, { + 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', + 'only_matching': True, + }, { + 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P%s)' % BitChuteIE._VALID_URL, + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', + }) + + title = self._html_search_regex( + (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'([^<]+)'), + webpage, 'title', default=None) or self._html_search_meta( + 'description', webpage, 'title', + default=None) or self._og_search_description(webpage) + + format_urls = [] + for mobj in re.finditer( + r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): + format_urls.append(mobj.group('url')) + format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) + + formats = [ + {'url': format_url} + for format_url in orderedSet(format_urls)] + + if not formats: + entries = self._parse_html5_media_entries( + url, webpage, video_id) + if not entries: + error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') + if error == 'Video Unavailable': + raise GeoRestrictedError(error) + raise ExtractorError(error) + formats = entries[0]['formats'] + + self._check_formats(formats, video_id) + self._sort_formats(formats) + + description = self._html_search_regex( + r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image:src', webpage, 'thumbnail') + uploader = self._html_search_regex( + (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', + r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), + webpage, 'uploader', fatal=False) + + upload_date = unified_strdate(self._search_regex( + r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', + webpage, 'upload date', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'formats': formats, + } + + +class BitChuteChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.bitchute.com/channel/victoriaxrave/', + 'playlist_mincount': 185, + 'info_dict': { + 'id': 'victoriaxrave', + }, + } + + _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' + + def _entries(self, channel_id): + channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id + offset = 0 + for page_num in itertools.count(1): + data = self._download_json( + '%sextend/' % channel_url, channel_id, + 'Downloading channel page %d' % page_num, + data=urlencode_postdata({ + 'csrfmiddlewaretoken': self._TOKEN, + 'name': '', + 'offset': offset, + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': channel_url, + 'X-Requested-With': 'XMLHttpRequest', + 'Cookie': 'csrftoken=%s' % self._TOKEN, + }) + if data.get('success') is False: + break + html = data.get('html') + if not html: + break + video_ids = re.findall( + r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', + html) + if not video_ids: + break + offset += len(video_ids) + for video_id in video_ids: + yield self.url_result( + 'https://www.bitchute.com/video/%s' % video_id, + ie=BitChuteIE.ie_key(), video_id=video_id) + + def _real_extract(self, url): + channel_id = self._match_id(url) + return self.playlist_result( + self._entries(channel_id), playlist_id=channel_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/bitwave.py b/plugins/youtube_download/yt_dlp/extractor/bitwave.py new file mode 100644 index 0000000..e6e093f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bitwave.py @@ -0,0 +1,61 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class BitwaveReplayIE(InfoExtractor): + IE_NAME = 'bitwave:replay' + _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$' + _TEST = { + 'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr', + 'only_matching': True + } + + def _real_extract(self, url): + replay_id = self._match_id(url) + replay = self._download_json( + 'https://api.bitwave.tv/v1/replays/' + replay_id, + replay_id + ) + + return { + 'id': replay_id, + 'title': replay['data']['title'], + 'uploader': replay['data']['name'], + 'uploader_id': replay['data']['name'], + 'url': replay['data']['url'], + 'thumbnails': [ + {'url': x} for x in replay['data']['thumbnails'] + ], + } + + +class BitwaveStreamIE(InfoExtractor): + IE_NAME = 'bitwave:stream' + _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$' + _TEST = { + 'url': 'https://bitwave.tv/doomtube', + 'only_matching': True + } + + def _real_extract(self, url): + username = self._match_id(url) + channel = self._download_json( + 'https://api.bitwave.tv/v1/channels/' + username, + username) + + formats = self._extract_m3u8_formats( + channel['data']['url'], username, + 'mp4') + self._sort_formats(formats) + + return { + 'id': username, + 'title': channel['data']['title'], + 'uploader': username, + 'uploader_id': username, + 'formats': formats, + 'thumbnail': channel['data']['thumbnail'], + 'is_live': True, + 'view_count': channel['data']['viewCount'] + } diff --git a/plugins/youtube_download/yt_dlp/extractor/blackboardcollaborate.py b/plugins/youtube_download/yt_dlp/extractor/blackboardcollaborate.py new file mode 100644 index 0000000..8ae2941 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/blackboardcollaborate.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import parse_iso8601 + + +class BlackboardCollaborateIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?P<region>[a-z-]+)\.bbcollab\.com/ + (?: + collab/ui/session/playback/load| + recording + )/ + (?P<id>[^/]+)''' + _TESTS = [ + { + 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', + 'md5': 'bb7a055682ee4f25fdb5838cdf014541', + 'info_dict': { + 'id': '0a633b6a88824deb8c918f470b22b256', + 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', + 'ext': 'mp4', + 'duration': 1896000, + 'timestamp': 1620331399, + 'upload_date': '20210506', + }, + }, + { + 'url': 'https://us.bbcollab.com/collab/ui/session/playback/load/76761522adfe4345a0dee6794bbcabda', + 'only_matching': True, + }, + { + 'url': 'https://ca.bbcollab.com/collab/ui/session/playback/load/b6399dcb44df4f21b29ebe581e22479d', + 'only_matching': True, + }, + { + 'url': 'https://eu.bbcollab.com/recording/51ed7b50810c4444a106e48cefb3e6b5', + 'only_matching': True, + }, + { + 'url': 'https://au.bbcollab.com/collab/ui/session/playback/load/2bccf7165d7c419ab87afc1ec3f3bb15', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + region = mobj.group('region') + video_id = mobj.group('id') + info = self._download_json( + 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + duration = info.get('duration') + title = info['name'] + upload_date = info.get('created') + streams = info['streams'] + formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + + return { + 'duration': duration, + 'formats': formats, + 'id': video_id, + 'timestamp': parse_iso8601(upload_date), + 'title': title, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bleacherreport.py b/plugins/youtube_download/yt_dlp/extractor/bleacherreport.py new file mode 100644 index 0000000..d1bf8e8 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bleacherreport.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .amp import AMPIE +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, +) + + +class BleacherReportIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', + 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', + 'info_dict': { + 'id': '2496438', + 'ext': 'mp4', + 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', + 'uploader_id': 3992341, + 'description': 'CFB, ACC, Florida State', + 'timestamp': 1434380212, + 'upload_date': '20150615', + 'uploader': 'Team Stream Now ', + }, + 'add_ie': ['Ooyala'], + }, { + 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', + 'md5': '6a5cd403418c7b01719248ca97fb0692', + 'info_dict': { + 'id': '2586817', + 'ext': 'webm', + 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', + 'timestamp': 1446839961, + 'uploader': 'Sean Fay', + 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', + 'uploader_id': 6466954, + 'upload_date': '20151011', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + + thumbnails = [] + primary_photo = article_data.get('primaryPhoto') + if primary_photo: + thumbnails = [{ + 'url': primary_photo['url'], + 'width': primary_photo.get('width'), + 'height': primary_photo.get('height'), + }] + + info = { + '_type': 'url_transparent', + 'id': article_id, + 'title': article_data['title'], + 'uploader': article_data.get('author', {}).get('name'), + 'uploader_id': article_data.get('authorId'), + 'timestamp': parse_iso8601(article_data.get('createdAt')), + 'thumbnails': thumbnails, + 'comment_count': int_or_none(article_data.get('commentsCount')), + 'view_count': int_or_none(article_data.get('hitCount')), + } + + video = article_data.get('video') + if video: + video_type = video['type'] + if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): + info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + elif video_type == 'ooyala.com': + info['url'] = 'ooyala:%s' % video['id'] + elif video_type == 'youtube.com': + info['url'] = video['id'] + elif video_type == 'vine.co': + info['url'] = 'https://vine.co/v/%s' % video['id'] + else: + info['url'] = video_type + video['id'] + return info + else: + raise ExtractorError('no video in the article', expected=True) + + +class BleacherReportCMSIE(AMPIE): + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' + _TESTS = [{ + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', + 'md5': '670b2d73f48549da032861130488c681', + 'info_dict': { + 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'ext': 'mp4', + 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', + 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', + 'upload_date': '20150723', + 'timestamp': 1437679032, + + }, + 'expected_warnings': [ + 'Unable to download f4m manifest' + ] + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info['id'] = video_id + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/blinkx.py b/plugins/youtube_download/yt_dlp/extractor/blinkx.py new file mode 100644 index 0000000..d70a3b3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/blinkx.py @@ -0,0 +1,86 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + remove_start, + int_or_none, +) + + +class BlinkxIE(InfoExtractor): + _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' + IE_NAME = 'blinkx' + + _TEST = { + 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', + 'md5': '337cf7a344663ec79bf93a526a2e06c7', + 'info_dict': { + 'id': 'Da0Gw3xc', + 'ext': 'mp4', + 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', + 'uploader': 'IGN News', + 'upload_date': '20150217', + 'timestamp': 1424215740, + 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', + 'duration': 47.743333, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + display_id = video_id[:8] + + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + + 'video=%s' % video_id) + data_json = self._download_webpage(api_url, display_id) + data = json.loads(data_json)['api']['results'][0] + duration = None + thumbnails = [] + formats = [] + for m in data['media']: + if m['type'] == 'jpg': + thumbnails.append({ + 'url': m['link'], + 'width': int(m['w']), + 'height': int(m['h']), + }) + elif m['type'] == 'original': + duration = float(m['d']) + elif m['type'] == 'youtube': + yt_id = m['link'] + self.to_screen('Youtube video detected: %s' % yt_id) + return self.url_result(yt_id, 'Youtube', video_id=yt_id) + elif m['type'] in ('flv', 'mp4'): + vcodec = remove_start(m['vcodec'], 'ff') + acodec = remove_start(m['acodec'], 'ff') + vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) + abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) + tbr = vbr + abr if vbr and abr else None + format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) + formats.append({ + 'format_id': format_id, + 'url': m['link'], + 'vcodec': vcodec, + 'acodec': acodec, + 'abr': abr, + 'vbr': vbr, + 'tbr': tbr, + 'width': int_or_none(m.get('w')), + 'height': int_or_none(m.get('h')), + }) + + self._sort_formats(formats) + + return { + 'id': display_id, + 'fullid': video_id, + 'title': data['title'], + 'formats': formats, + 'uploader': data.get('channel_name'), + 'timestamp': data.get('pubdate_epoch'), + 'description': data.get('description'), + 'thumbnails': thumbnails, + 'duration': duration, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/blogger.py b/plugins/youtube_download/yt_dlp/extractor/blogger.py new file mode 100644 index 0000000..dba131c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/blogger.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from ..utils import ( + mimetype2ext, + parse_duration, + parse_qs, + str_or_none, + traverse_obj, +) +from .common import InfoExtractor + + +class BloggerIE(InfoExtractor): + IE_NAME = 'blogger.com' + _VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)' + _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''' + _TESTS = [{ + 'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'title': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*', + 'duration': 76.068, + } + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall(BloggerIE._VALID_EMBED, webpage) + + def _real_extract(self, url): + token_id = self._match_id(url) + webpage = self._download_webpage(url, token_id) + data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') + data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + streams = data['streams'] + formats = [{ + 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), + 'url': stream['play_url'], + 'format_id': str_or_none(stream.get('format_id')), + } for stream in streams] + + return { + 'id': data.get('iframe_id', token_id), + 'title': data.get('iframe_id', token_id), + 'formats': formats, + 'thumbnail': data.get('thumbnail'), + 'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bloomberg.py b/plugins/youtube_download/yt_dlp/extractor/bloomberg.py new file mode 100644 index 0000000..2fbfad1 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bloomberg.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BloombergIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' + + _TESTS = [{ + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', + # The md5 checksum changes + 'info_dict': { + 'id': 'qurhIVlJSB6hzkVi229d8g', + 'ext': 'flv', + 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', + }, + 'params': { + 'format': 'best[format_id^=hds]', + }, + }, { + # video ID in BPlayer(...) + 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', + 'info_dict': { + 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', + 'ext': 'flv', + 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', + 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', + }, + 'params': { + 'format': 'best[format_id^=hds]', + }, + }, { + # data-bmmrid= + 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', + 'only_matching': True, + }, { + 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', + 'only_matching': True, + }, { + 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', + 'only_matching': True, + }] + + def _real_extract(self, url): + name = self._match_id(url) + webpage = self._download_webpage(url, name) + video_id = self._search_regex( + (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), + webpage, 'id', group='id', default=None) + if not video_id: + bplayer_data = self._parse_json(self._search_regex( + r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) + video_id = bplayer_data['id'] + title = re.sub(': Video$', '', self._og_search_title(webpage)) + + embed_info = self._download_json( + 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + formats = [] + for stream in embed_info['streams']: + stream_url = stream.get('url') + if not stream_url: + continue + if stream['muxing_format'] == 'TS': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + stream_url, video_id, f4m_id='hds', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bokecc.py b/plugins/youtube_download/yt_dlp/extractor/bokecc.py new file mode 100644 index 0000000..6a89d36 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bokecc.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ExtractorError + + +class BokeCCBaseIE(InfoExtractor): + def _extract_bokecc_formats(self, webpage, video_id, format_id=None): + player_params_str = self._html_search_regex( + r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', + webpage, 'player params', group='query') + + player_params = compat_parse_qs(player_params_str) + + info_xml = self._download_xml( + 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + player_params['siteid'][0], player_params['vid'][0]), video_id) + + formats = [{ + 'format_id': format_id, + 'url': quality.find('./copy').attrib['playurl'], + 'quality': int(quality.attrib['value']), + } for quality in info_xml.findall('./video/quality')] + + self._sort_formats(formats) + + return formats + + +class BokeCCIE(BokeCCBaseIE): + _IE_DESC = 'CC视频' + _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' + + _TESTS = [{ + 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', + 'info_dict': { + 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', + 'ext': 'flv', + 'title': 'BokeCC Video', + }, + }] + + def _real_extract(self, url): + qs = compat_parse_qs(self._match_valid_url(url).group('query')) + if not qs.get('vid') or not qs.get('uid'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + + webpage = self._download_webpage(url, video_id) + + return { + 'id': video_id, + 'title': 'BokeCC Video', # no title provided in the webpage + 'formats': self._extract_bokecc_formats(webpage, video_id), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bongacams.py b/plugins/youtube_download/yt_dlp/extractor/bongacams.py new file mode 100644 index 0000000..4e346e7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bongacams.py @@ -0,0 +1,59 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + try_get, + urlencode_postdata, +) + + +class BongaCamsIE(InfoExtractor): + _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)' + _TESTS = [{ + 'url': 'https://de.bongacams.com/azumi-8', + 'only_matching': True, + }, { + 'url': 'https://cn.bongacams.com/azumi-8', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') + channel_id = mobj.group('id') + + amf = self._download_json( + 'https://%s/tools/amf.php' % host, channel_id, + data=urlencode_postdata(( + ('method', 'getRoomData'), + ('args[]', channel_id), + ('args[]', 'false'), + )), headers={'X-Requested-With': 'XMLHttpRequest'}) + + server_url = amf['localData']['videoServerUrl'] + + uploader_id = try_get( + amf, lambda x: x['performerData']['username'], compat_str) or channel_id + uploader = try_get( + amf, lambda x: x['performerData']['displayName'], compat_str) + like_count = int_or_none(try_get( + amf, lambda x: x['performerData']['loversCount'])) + + formats = self._extract_m3u8_formats( + '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + channel_id, 'mp4', m3u8_id='hls', live=True) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': uploader or uploader_id, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'like_count': like_count, + 'age_limit': 18, + 'is_live': True, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bostonglobe.py b/plugins/youtube_download/yt_dlp/extractor/bostonglobe.py new file mode 100644 index 0000000..57882fb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bostonglobe.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + extract_attributes, +) + + +class BostonGlobeIE(InfoExtractor): + _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' + _TESTS = [ + { + 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', + 'md5': '0a62181079c85c2d2b618c9a738aedaf', + 'info_dict': { + 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', + 'id': '5320421710001', + 'ext': 'mp4', + 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', + 'timestamp': 1486877593, + 'upload_date': '20170212', + 'uploader_id': '245991542', + }, + }, + { + # Embedded youtube video; we hand it off to the Generic extractor. + 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', + 'md5': '582b40327089d5c0c949b3c54b13c24b', + 'info_dict': { + 'title': "Who Is Matt Damon's Favorite Batman?", + 'id': 'ZW1QCnlA6Qc', + 'ext': 'mp4', + 'upload_date': '20170217', + 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', + 'uploader': 'The Late Late Show with James Corden', + 'uploader_id': 'TheLateLateShow', + }, + 'expected_warnings': ['404'], + }, + ] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + page_title = self._og_search_title(webpage, default=None) + + # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> + entries = [] + for video in re.findall(r'(?i)(<video[^>]+>)', webpage): + attrs = extract_attributes(video) + + video_id = attrs.get('data-brightcove-video-id') + account_id = attrs.get('data-account') + player_id = attrs.get('data-player') + embed = attrs.get('data-embed') + + if video_id and account_id and player_id and embed: + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, embed, video_id)) + + if len(entries) == 0: + return self.url_result(url, 'Generic') + elif len(entries) == 1: + return self.url_result(entries[0], 'BrightcoveNew') + else: + return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') diff --git a/plugins/youtube_download/yt_dlp/extractor/box.py b/plugins/youtube_download/yt_dlp/extractor/box.py new file mode 100644 index 0000000..8214086 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/box.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_iso8601, + # try_get, + update_url_query, +) + + +class BoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)' + _TEST = { + 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', + 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', + 'info_dict': { + 'id': '510727257538', + 'ext': 'mp4', + 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4', + 'uploader': 'MLS Video', + 'timestamp': 1566320259, + 'upload_date': '20190820', + 'uploader_id': '235196876', + } + } + + def _real_extract(self, url): + shared_name, file_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, file_id) + request_token = self._parse_json(self._search_regex( + r'Box\.config\s*=\s*({.+?});', webpage, + 'Box config'), file_id)['requestToken'] + access_token = self._download_json( + 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, + 'Downloading token JSON metadata', + data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ + 'Content-Type': 'application/json', + 'X-Request-Token': request_token, + 'X-Box-EndUser-API': 'sharedName=' + shared_name, + })[file_id]['read'] + shared_link = 'https://app.box.com/s/' + shared_name + f = self._download_json( + 'https://api.box.com/2.0/files/' + file_id, file_id, + 'Downloading file JSON metadata', headers={ + 'Authorization': 'Bearer ' + access_token, + 'BoxApi': 'shared_link=' + shared_link, + 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats + }, query={ + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + }) + title = f['name'] + + query = { + 'access_token': access_token, + 'shared_link': shared_link + } + + formats = [] + + # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []): + # entry_url_template = try_get( + # entry, lambda x: x['content']['url_template']) + # if not entry_url_template: + # continue + # representation = entry.get('representation') + # if representation == 'dash': + # TODO: append query to every fragment URL + # formats.extend(self._extract_mpd_formats( + # entry_url_template.replace('{+asset_path}', 'manifest.mpd'), + # file_id, query=query)) + + authenticated_download_url = f.get('authenticated_download_url') + if authenticated_download_url and f.get('is_download_available'): + formats.append({ + 'ext': f.get('extension') or determine_ext(title), + 'filesize': f.get('size'), + 'format_id': 'download', + 'url': update_url_query(authenticated_download_url, query), + }) + + self._sort_formats(formats) + + creator = f.get('created_by') or {} + + return { + 'id': file_id, + 'title': title, + 'formats': formats, + 'description': f.get('description') or None, + 'uploader': creator.get('name'), + 'timestamp': parse_iso8601(f.get('created_at')), + 'uploader_id': creator.get('id'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bpb.py b/plugins/youtube_download/yt_dlp/extractor/bpb.py new file mode 100644 index 0000000..8f6ef3c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bpb.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + determine_ext, +) + + +class BpbIE(InfoExtractor): + IE_DESC = 'Bundeszentrale für politische Bildung' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/' + + _TEST = { + 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', + # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 + 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', + 'info_dict': { + 'id': '297', + 'ext': 'mp4', + 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', + 'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<h2 class="white">(.*?)</h2>', webpage, 'title') + video_info_dicts = re.findall( + r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) + + formats = [] + for video_info in video_info_dicts: + video_info = self._parse_json( + video_info, video_id, transform_source=js_to_json, fatal=False) + if not video_info: + continue + video_url = video_info.get('src') + if not video_url: + continue + quality = 'high' if '_high' in video_url else 'low' + formats.append({ + 'url': video_url, + 'quality': 10 if quality == 'high' else 0, + 'format_note': quality, + 'format_id': '%s-%s' % (quality, determine_ext(video_url)), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': self._og_search_description(webpage), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/br.py b/plugins/youtube_download/yt_dlp/extractor/br.py new file mode 100644 index 0000000..7169ece --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/br.py @@ -0,0 +1,310 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + parse_duration, + parse_iso8601, + xpath_element, + xpath_text, +) + + +class BRIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk' + _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' + + _TESTS = [ + { + 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html', + 'md5': '83a0477cf0b8451027eb566d88b51106', + 'info_dict': { + 'id': '48f656ef-287e-486f-be86-459122db22cc', + 'ext': 'mp4', + 'title': 'Die böse Überraschung', + 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9', + 'duration': 180, + 'uploader': 'Reinhard Weber', + 'upload_date': '20150422', + }, + 'skip': '404 not found', + }, + { + 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', + 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef', + 'info_dict': { + 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', + 'ext': 'flv', + 'title': 'Manfred Schreiber ist tot', + 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', + 'duration': 26, + }, + 'skip': '404 not found', + }, + { + 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', + 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', + 'info_dict': { + 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', + 'ext': 'aac', + 'title': 'Kurzweilig und sehr bewegend', + 'description': 'md5:0351996e3283d64adeb38ede91fac54e', + 'duration': 296, + }, + 'skip': '404 not found', + }, + { + 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', + 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', + 'info_dict': { + 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', + 'ext': 'mp4', + 'title': 'Umweltbewusster Häuslebauer', + 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', + 'duration': 116, + } + }, + { + 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', + 'md5': '23bca295f1650d698f94fc570977dae3', + 'info_dict': { + 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', + 'ext': 'mp4', + 'title': 'Folge 1 - Metaphysik', + 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', + 'duration': 893, + 'uploader': 'Eva Maria Steimle', + 'upload_date': '20170208', + } + }, + ] + + def _real_extract(self, url): + base_url, display_id = self._match_valid_url(url).groups() + page = self._download_webpage(url, display_id) + xml_url = self._search_regex( + r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') + xml = self._download_xml(base_url + xml_url, display_id) + + medias = [] + + for xml_media in xml.findall('video') + xml.findall('audio'): + media_id = xml_media.get('externalId') + media = { + 'id': media_id, + 'title': xpath_text(xml_media, 'title', 'title', True), + 'duration': parse_duration(xpath_text(xml_media, 'duration')), + 'formats': self._extract_formats(xpath_element( + xml_media, 'assets'), media_id), + 'thumbnails': self._extract_thumbnails(xpath_element( + xml_media, 'teaserImage/variants'), base_url), + 'description': xpath_text(xml_media, 'desc'), + 'webpage_url': xpath_text(xml_media, 'permalink'), + 'uploader': xpath_text(xml_media, 'author'), + } + broadcast_date = xpath_text(xml_media, 'broadcastDate') + if broadcast_date: + media['upload_date'] = ''.join(reversed(broadcast_date.split('.'))) + medias.append(media) + + if len(medias) > 1: + self.report_warning( + 'found multiple medias; please ' + 'report this with the video URL to http://yt-dl.org/bug') + if not medias: + raise ExtractorError('No media entries found') + return medias[0] + + def _extract_formats(self, assets, media_id): + formats = [] + for asset in assets.findall('asset'): + format_url = xpath_text(asset, ['downloadUrl', 'url']) + asset_type = asset.get('type') + if asset_type.startswith('HDS'): + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) + elif asset_type.startswith('HLS'): + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) + else: + format_info = { + 'ext': xpath_text(asset, 'mediaType'), + 'width': int_or_none(xpath_text(asset, 'frameWidth')), + 'height': int_or_none(xpath_text(asset, 'frameHeight')), + 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')), + 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')), + 'vcodec': xpath_text(asset, 'codecVideo'), + 'acodec': xpath_text(asset, 'codecAudio'), + 'container': xpath_text(asset, 'mediaType'), + 'filesize': int_or_none(xpath_text(asset, 'size')), + } + format_url = self._proto_relative_url(format_url) + if format_url: + http_format_info = format_info.copy() + http_format_info.update({ + 'url': format_url, + 'format_id': 'http-%s' % asset_type, + }) + formats.append(http_format_info) + server_prefix = xpath_text(asset, 'serverPrefix') + if server_prefix: + rtmp_format_info = format_info.copy() + rtmp_format_info.update({ + 'url': server_prefix, + 'play_path': xpath_text(asset, 'fileName'), + 'format_id': 'rtmp-%s' % asset_type, + }) + formats.append(rtmp_format_info) + self._sort_formats(formats) + return formats + + def _extract_thumbnails(self, variants, base_url): + thumbnails = [{ + 'url': base_url + xpath_text(variant, 'url'), + 'width': int_or_none(xpath_text(variant, 'width')), + 'height': int_or_none(xpath_text(variant, 'height')), + } for variant in variants.findall('variant') if xpath_text(variant, 'url')] + thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) + return thumbnails + + +class BRMediathekIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk Mediathek' + _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' + + _TESTS = [{ + 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', + 'md5': 'fdc3d485835966d1622587d08ba632ec', + 'info_dict': { + 'id': 'av:5a1e6a6e8fce6d001871cc8e', + 'ext': 'mp4', + 'title': 'Die Sendung vom 28.11.2017', + 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', + 'timestamp': 1511942766, + 'upload_date': '20171129', + } + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + + clip = self._download_json( + 'https://proxy-base.master.mango.express/graphql', + clip_id, data=json.dumps({ + "query": """{ + viewer { + clip(id: "%s") { + title + description + duration + createdAt + ageRestriction + videoFiles { + edges { + node { + publicLocation + fileSize + videoProfile { + width + height + bitrate + encoding + } + } + } + } + captionFiles { + edges { + node { + publicLocation + } + } + } + teaserImages { + edges { + node { + imageFiles { + edges { + node { + publicLocation + width + height + } + } + } + } + } + } + } + } +}""" % clip_id}).encode(), headers={ + 'Content-Type': 'application/json', + })['data']['viewer']['clip'] + title = clip['title'] + + formats = [] + for edge in clip.get('videoFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + ext = determine_ext(n_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + n_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + video_profile = node.get('videoProfile', {}) + tbr = int_or_none(video_profile.get('bitrate')) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': n_url, + 'width': int_or_none(video_profile.get('width')), + 'height': int_or_none(video_profile.get('height')), + 'tbr': tbr, + 'filesize': int_or_none(node.get('fileSize')), + }) + self._sort_formats(formats) + + subtitles = {} + for edge in clip.get('captionFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + subtitles.setdefault('de', []).append({ + 'url': n_url, + }) + + thumbnails = [] + for edge in clip.get('teaserImages', {}).get('edges', []): + for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): + node = image_edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + thumbnails.append({ + 'url': n_url, + 'width': int_or_none(node.get('width')), + 'height': int_or_none(node.get('height')), + }) + + return { + 'id': clip_id, + 'title': title, + 'description': clip.get('description'), + 'duration': int_or_none(clip.get('duration')), + 'timestamp': parse_iso8601(clip.get('createdAt')), + 'age_limit': int_or_none(clip.get('ageRestriction')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/bravotv.py b/plugins/youtube_download/yt_dlp/extractor/bravotv.py new file mode 100644 index 0000000..139d51c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/bravotv.py @@ -0,0 +1,120 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .adobepass import AdobePassIE +from ..utils import ( + smuggle_url, + update_url_query, + int_or_none, + float_or_none, + try_get, + dict_get, +) + + +class BravoTVIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', + 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', + 'info_dict': { + 'id': 'epL0pmK1kQlT', + 'ext': 'mp4', + 'title': 'The Top Chef Season 16 Winner Is...', + 'description': 'Find out who takes the title of Top Chef!', + 'uploader': 'NBCU-BRAV', + 'upload_date': '20190314', + 'timestamp': 1552591860, + 'season_number': 16, + 'episode_number': 15, + 'series': 'Top Chef', + 'episode': 'The Top Chef Season 16 Winner Is...', + 'duration': 190.0, + } + }, { + 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', + 'only_matching': True, + }, { + 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', + 'only_matching': True, + }] + + def _real_extract(self, url): + site, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, display_id) + settings = self._parse_json(self._search_regex( + r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), + display_id) + info = {} + query = { + 'mbr': 'true', + } + account_pid, release_pid = [None] * 2 + tve = settings.get('ls_tve') + if tve: + query['manifest'] = 'm3u' + mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) + if mobj: + account_pid, tp_path = mobj.groups() + release_pid = tp_path.strip('/').split('/')[-1] + else: + account_pid = 'HNK2IC' + tp_path = release_pid = tve['release_pid'] + if tve.get('entitlement') == 'auth': + adobe_pass = settings.get('tve_adobe_auth', {}) + if site == 'bravotv': + site = 'bravo' + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId') or site, + tve['title'], release_pid, tve.get('rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, + adobe_pass.get('adobePassRequestorId') or site, resource) + else: + shared_playlist = settings['ls_playlist'] + account_pid = shared_playlist['account_pid'] + metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] + tp_path = release_pid = metadata.get('release_pid') + if not release_pid: + release_pid = metadata['guid'] + tp_path = 'media/guid/2140479951/' + release_pid + info.update({ + 'title': metadata['title'], + 'description': metadata.get('description'), + 'season_number': int_or_none(metadata.get('season_num')), + 'episode_number': int_or_none(metadata.get('episode_num')), + }) + query['switch'] = 'progressive' + + tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path) + + tp_metadata = self._download_json( + update_url_query(tp_url, {'format': 'preview'}), + display_id, fatal=False) + if tp_metadata: + info.update({ + 'title': tp_metadata.get('title'), + 'description': tp_metadata.get('description'), + 'duration': float_or_none(tp_metadata.get('duration'), 1000), + 'season_number': int_or_none( + dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))), + 'episode_number': int_or_none( + dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))), + # For some reason the series is sometimes wrapped into a single element array. + 'series': try_get( + dict_get(tp_metadata, ('pl1$show', 'nbcu$show')), + lambda x: x[0] if isinstance(x, list) else x, + expected_type=str), + 'episode': dict_get( + tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')), + }) + + info.update({ + '_type': 'url_transparent', + 'id': release_pid, + 'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}), + 'ie_key': 'ThePlatform', + }) + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/breakcom.py b/plugins/youtube_download/yt_dlp/extractor/breakcom.py new file mode 100644 index 0000000..f38789f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/breakcom.py @@ -0,0 +1,90 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + int_or_none, + url_or_none, +) + + +class BreakIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _TESTS = [{ + 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', + 'info_dict': { + 'id': '2468056', + 'ext': 'mp4', + 'title': 'When Girls Act Like D-Bags', + 'age_limit': 13, + }, + }, { + # youtube embed + 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', + 'info_dict': { + 'id': 'RrrDLdeL2HQ', + 'ext': 'mp4', + 'title': 'Whale Watching Boat Crashing Into San Diego Dock', + 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', + 'upload_date': '20160331', + 'uploader': 'Steve Holden', + 'uploader_id': 'sdholden07', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id, video_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage(url, display_id) + + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + content = self._parse_json( + self._search_regex( + r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, + 'content'), + display_id) + + formats = [] + for video in content: + video_url = url_or_none(video.get('url')) + if not video_url: + continue + bitrate = int_or_none(self._search_regex( + r'(\d+)_kbps', video_url, 'tbr', default=None)) + formats.append({ + 'url': video_url, + 'format_id': 'http-%d' % bitrate if bitrate else 'http', + 'tbr': bitrate, + }) + self._sort_formats(formats) + + title = self._search_regex( + (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') + + def get(key, name): + return int_or_none(self._search_regex( + r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, + default=None)) + + age_limit = get('ratings', 'age limit') + video_id = video_id or get('pid', 'video id') or display_id + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/breitbart.py b/plugins/youtube_download/yt_dlp/extractor/breitbart.py new file mode 100644 index 0000000..f50f719 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/breitbart.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class BreitBartIE(InfoExtractor): + _VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji', + 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade', + 'info_dict': { + 'id': '5cOz1yup', + 'ext': 'mp4', + 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery', + 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', + 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', + 'age_limit': 0, + } + }, { + 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'(?s)<title>(.*?)', webpage, 'video title'), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': self._rta_search(webpage), + 'formats': formats + } diff --git a/plugins/youtube_download/yt_dlp/extractor/brightcove.py b/plugins/youtube_download/yt_dlp/extractor/brightcove.py new file mode 100644 index 0000000..dcd332b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/brightcove.py @@ -0,0 +1,688 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re +import struct + +from .adobepass import AdobePassIE +from .common import InfoExtractor +from ..compat import ( + compat_etree_fromstring, + compat_HTTPError, + compat_parse_qs, + compat_urlparse, + compat_xml_parse_error, +) +from ..utils import ( + clean_html, + dict_get, + extract_attributes, + ExtractorError, + find_xpath_attr, + fix_xml_ampersands, + float_or_none, + int_or_none, + js_to_json, + mimetype2ext, + parse_iso8601, + parse_qs, + smuggle_url, + str_or_none, + try_get, + unescapeHTML, + unsmuggle_url, + UnsupportedError, + update_url_query, + url_or_none, +) + + +class BrightcoveLegacyIE(InfoExtractor): + IE_NAME = 'brightcove:legacy' + _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P.*)' + + _TESTS = [ + { + # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', + 'md5': '5423e113865d26e40624dce2e4b45d95', + 'note': 'Test Brightcove downloads and detection in GenericIE', + 'info_dict': { + 'id': '2371591881001', + 'ext': 'mp4', + 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', + 'uploader': '8TV', + 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', + 'timestamp': 1368213670, + 'upload_date': '20130510', + 'uploader_id': '1589608506001', + }, + 'skip': 'The player has been deactivated by the content owner', + }, + { + # From http://medianetwork.oracle.com/video/player/1785452137001 + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', + 'info_dict': { + 'id': '1785452137001', + 'ext': 'flv', + 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', + 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', + 'uploader': 'Oracle', + 'timestamp': 1344975024, + 'upload_date': '20120814', + 'uploader_id': '1460825906', + }, + 'skip': 'video not playable', + }, + { + # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ + 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', + 'info_dict': { + 'id': '2750934548001', + 'ext': 'mp4', + 'title': 'This Bracelet Acts as a Personal Thermostat', + 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', + # 'uploader': 'Mashable', + 'timestamp': 1382041798, + 'upload_date': '20131017', + 'uploader_id': '1130468786001', + }, + }, + { + # test that the default referer works + # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ + 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', + 'info_dict': { + 'id': '2878862109001', + 'ext': 'mp4', + 'title': 'Lost in Motion II', + 'description': 'md5:363109c02998fee92ec02211bd8000df', + 'uploader': 'National Ballet of Canada', + }, + 'skip': 'Video gone', + }, + { + # test flv videos served by akamaihd.net + # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', + # The md5 checksum changes on each download + 'info_dict': { + 'id': '3750436379001', + 'ext': 'flv', + 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'uploader': 'RBTV Old (do not use)', + 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'timestamp': 1409122195, + 'upload_date': '20140827', + 'uploader_id': '710858724001', + }, + 'skip': 'Video gone', + }, + { + # playlist with 'videoList' + # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', + 'info_dict': { + 'title': 'Sealife', + 'id': '3550319591001', + }, + 'playlist_mincount': 7, + 'skip': 'Unsupported URL', + }, + { + # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) + 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', + 'info_dict': { + 'id': '1522758701001', + 'title': 'Lesson 08', + }, + 'playlist_mincount': 10, + 'skip': 'Unsupported URL', + }, + { + # playerID inferred from bcpid + # from http://www.un.org/chinese/News/story.asp?NewsID=27724 + 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', + 'only_matching': True, # Tested in GenericIE + } + ] + + @classmethod + def _build_brightcove_url(cls, object_str): + """ + Build a Brightcove url from a xml string containing + {params} + """ + + # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 + object_str = re.sub(r'(', + lambda m: m.group(1) + '/>', object_str) + # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 + object_str = object_str.replace('<--', ' {1}'.format(start, stop) + else: + yield line + + return '\r\n'.join(_fix_subtitle(subtitles)) diff --git a/plugins/youtube_download/yt_dlp/extractor/cgtn.py b/plugins/youtube_download/yt_dlp/extractor/cgtn.py new file mode 100644 index 0000000..89f1738 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cgtn.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_timestamp, +) + + +class CGTNIE(InfoExtractor): + _VALID_URL = r'https?://news\.cgtn\.com/news/[0-9]{4}-[0-9]{2}-[0-9]{2}/[a-zA-Z0-9-]+-(?P[a-zA-Z0-9-]+)/index\.html' + _TESTS = [ + { + 'url': 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html', + 'info_dict': { + 'id': 'YuOUaOzGQU', + 'ext': 'mp4', + 'title': 'Up and Out of Poverty Ep. 1: A solemn promise', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1615295940, + 'upload_date': '20210309', + }, + 'params': { + 'skip_download': True + } + }, { + 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html', + 'info_dict': { + 'id': '10REvJCewCY', + 'ext': 'mp4', + 'title': 'China, Indonesia vow to further deepen maritime cooperation', + 'thumbnail': r're:^https?://.*\.png$', + 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.', + 'author': 'CGTN', + 'category': 'China', + 'timestamp': 1622950200, + 'upload_date': '20210606', + }, + 'params': { + 'skip_download': False + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + download_url = self._html_search_regex(r'data-video ="(?P.+m3u8)"', webpage, 'download_url') + datetime_str = self._html_search_regex(r'\s*(.+?)\s*', webpage, 'datetime_str', fatal=False) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'), + 'category': self._html_search_regex(r'\s*(.+?)\s*', + webpage, 'category', fatal=False), + 'author': self._html_search_regex(r'
\s*(.+?)\s*
', + webpage, 'author', default=None, fatal=False), + 'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/channel9.py b/plugins/youtube_download/yt_dlp/extractor/channel9.py new file mode 100644 index 0000000..90024db --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/channel9.py @@ -0,0 +1,260 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_iso8601, + qualities, + unescapeHTML, +) + + +class Channel9IE(InfoExtractor): + IE_DESC = 'Channel 9' + IE_NAME = 'channel9' + _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P.+?)(?P/RSS)?/?(?:[?#&]|$)' + + _TESTS = [{ + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': '32083d4eaf1946db6d454313f44510ca', + 'info_dict': { + 'id': '6c413323-383a-49dc-88f9-a22800cab024', + 'ext': 'wmv', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', + 'duration': 4576, + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1377717420, + 'upload_date': '20130828', + 'session_code': 'KOS002', + 'session_room': 'Arena 1A', + 'session_speakers': 'count:5', + }, + }, { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', + 'info_dict': { + 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', + 'ext': 'wmv', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', + 'duration': 1540, + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1386381991, + 'upload_date': '20131207', + 'authors': ['Mike Wilmot'], + }, + }, { + # low quality mp4 is best + 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'info_dict': { + 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', + 'ext': 'mp4', + 'title': 'Ranges for the Standard Library', + 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', + 'duration': 5646, + 'thumbnail': r're:https?://.*\.jpg', + 'upload_date': '20150930', + 'timestamp': 1443640735, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'info_dict': { + 'id': 'Events/DEVintersection/DEVintersection-2016', + 'title': 'DEVintersection 2016 Orlando Sessions', + }, + 'playlist_mincount': 14, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', + 'only_matching': True, + }, { + 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', + 'only_matching': True, + }] + + _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', + webpage) + + def _extract_list(self, video_id, rss_url=None): + if not rss_url: + rss_url = self._RSS_URL % video_id + rss = self._download_xml(rss_url, video_id, 'Downloading RSS') + entries = [self.url_result(session_url.text, 'Channel9') + for session_url in rss.findall('./channel/item/link')] + title_text = rss.find('./channel/title').text + return self.playlist_result(entries, video_id, title_text) + + def _real_extract(self, url): + content_path, rss = self._match_valid_url(url).groups() + + if rss: + return self._extract_list(content_path, url) + + webpage = self._download_webpage( + url, content_path, 'Downloading web page') + + episode_data = self._search_regex( + r"data-episode='([^']+)'", webpage, 'episode data', default=None) + if episode_data: + episode_data = self._parse_json(unescapeHTML( + episode_data), content_path) + content_id = episode_data['contentId'] + is_session = '/Sessions(' in episode_data['api'] + content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' + if is_session: + content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' + else: + content_url += 'Authors,Body&$expand=Authors' + content_data = self._download_json(content_url, content_id) + title = content_data['Title'] + + QUALITIES = ( + 'mp3', + 'wmv', 'mp4', + 'wmv-low', 'mp4-low', + 'wmv-mid', 'mp4-mid', + 'wmv-high', 'mp4-high', + ) + + quality_key = qualities(QUALITIES) + + def quality(quality_id, format_url): + return (len(QUALITIES) if '_Source.' in format_url + else quality_key(quality_id)) + + formats = [] + urls = set() + + SITE_QUALITIES = { + 'MP3': 'mp3', + 'MP4': 'mp4', + 'Low Quality WMV': 'wmv-low', + 'Low Quality MP4': 'mp4-low', + 'Mid Quality WMV': 'wmv-mid', + 'Mid Quality MP4': 'mp4-mid', + 'High Quality WMV': 'wmv-high', + 'High Quality MP4': 'mp4-high', + } + + formats_select = self._search_regex( + r'(?s)]+name=["\']format[^>]+>(.+?)]+\bvalue=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*(?P[^<]+?)\s*<', + formats_select): + format_url = mobj.group('url') + if format_url in urls: + continue + urls.add(format_url) + format_id = mobj.group('format') + quality_id = SITE_QUALITIES.get(format_id, format_id) + formats.append({ + 'url': format_url, + 'format_id': quality_id, + 'quality': quality(quality_id, format_url), + 'vcodec': 'none' if quality_id == 'mp3' else None, + }) + + API_QUALITIES = { + 'VideoMP4Low': 'mp4-low', + 'VideoWMV': 'wmv-mid', + 'VideoMP4Medium': 'mp4-mid', + 'VideoMP4High': 'mp4-high', + 'VideoWMVHQ': 'wmv-hq', + } + + for format_id, q in API_QUALITIES.items(): + q_url = content_data.get(format_id) + if not q_url or q_url in urls: + continue + urls.add(q_url) + formats.append({ + 'url': q_url, + 'format_id': q, + 'quality': quality(q, q_url), + }) + + slides = content_data.get('Slides') + zip_file = content_data.get('ZipFile') + + if not formats and not slides and not zip_file: + self.raise_no_formats( + 'None of recording, slides or zip are available for %s' % content_path) + self._sort_formats(formats) + + subtitles = {} + for caption in content_data.get('Captions', []): + caption_url = caption.get('Url') + if not caption_url: + continue + subtitles.setdefault(caption.get('Language', 'en'), []).append({ + 'url': caption_url, + 'ext': 'vtt', + }) + + common = { + 'id': content_id, + 'title': title, + 'description': clean_html(content_data.get('Description') or content_data.get('Body')), + 'thumbnail': content_data.get('VideoPlayerPreviewImage'), + 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), + 'timestamp': parse_iso8601(content_data.get('PublishedDate')), + 'avg_rating': int_or_none(content_data.get('Rating')), + 'rating_count': int_or_none(content_data.get('RatingCount')), + 'view_count': int_or_none(content_data.get('Views')), + 'comment_count': int_or_none(content_data.get('CommentCount')), + 'subtitles': subtitles, + } + if is_session: + speakers = [] + for s in content_data.get('Speakers', []): + speaker_name = s.get('FullName') + if not speaker_name: + continue + speakers.append(speaker_name) + + common.update({ + 'session_code': content_data.get('Code'), + 'session_room': content_data.get('Room'), + 'session_speakers': speakers, + }) + else: + authors = [] + for a in content_data.get('Authors', []): + author_name = a.get('DisplayName') + if not author_name: + continue + authors.append(author_name) + common['authors'] = authors + + contents = [] + + if slides: + d = common.copy() + d.update({'title': title + '-Slides', 'url': slides}) + contents.append(d) + + if zip_file: + d = common.copy() + d.update({'title': title + '-Zip', 'url': zip_file}) + contents.append(d) + + if formats: + d = common.copy() + d.update({'title': title, 'formats': formats}) + contents.append(d) + return self.playlist_result(contents) + else: + return self._extract_list(content_path) diff --git a/plugins/youtube_download/yt_dlp/extractor/charlierose.py b/plugins/youtube_download/yt_dlp/extractor/charlierose.py new file mode 100644 index 0000000..42c9af2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/charlierose.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class CharlieRoseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P\d+)' + _TESTS = [{ + 'url': 'https://charlierose.com/videos/27996', + 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', + 'info_dict': { + 'id': '27996', + 'ext': 'mp4', + 'title': 'Remembering Zaha Hadid', + 'thumbnail': r're:^https?://.*\.jpg\?\d+', + 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, + }, + }, { + 'url': 'https://charlierose.com/videos/27996', + 'only_matching': True, + }, { + 'url': 'https://charlierose.com/episodes/30887?autoplay=true', + 'only_matching': True, + }] + + _PLAYER_BASE = 'https://charlierose.com/video/player/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) + + title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') + + info_dict = self._parse_html5_media_entries( + self._PLAYER_BASE % video_id, webpage, video_id, + m3u8_entry_protocol='m3u8_native')[0] + + self._sort_formats(info_dict['formats']) + self._remove_duplicate_formats(info_dict['formats']) + + info_dict.update({ + 'id': video_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + }) + + return info_dict diff --git a/plugins/youtube_download/yt_dlp/extractor/chaturbate.py b/plugins/youtube_download/yt_dlp/extractor/chaturbate.py new file mode 100644 index 0000000..8da51f9 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/chaturbate.py @@ -0,0 +1,109 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + lowercase_escape, + url_or_none, +) + + +class ChaturbateIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' + _TESTS = [{ + 'url': 'https://www.chaturbate.com/siswet19/', + 'info_dict': { + 'id': 'siswet19', + 'ext': 'mp4', + 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'age_limit': 18, + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Room is offline', + }, { + 'url': 'https://chaturbate.com/fullvideo/?b=caylin', + 'only_matching': True, + }, { + 'url': 'https://en.chaturbate.com/siswet19/', + 'only_matching': True, + }] + + _ROOM_OFFLINE = 'Room is currently offline' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://chaturbate.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) + + found_m3u8_urls = [] + + data = self._parse_json( + self._search_regex( + r'initialRoomDossier\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if data: + m3u8_url = url_or_none(data.get('hls_source')) + if m3u8_url: + found_m3u8_urls.append(m3u8_url) + + if not found_m3u8_urls: + for m in re.finditer( + r'(\\u002[27])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(lowercase_escape(m.group('url'))) + + if not found_m3u8_urls: + for m in re.finditer( + r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(m.group('url')) + + m3u8_urls = [] + for found_m3u8_url in found_m3u8_urls: + m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '') + for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): + if m3u8_url not in m3u8_urls: + m3u8_urls.append(m3u8_url) + + if not m3u8_urls: + error = self._search_regex( + [r']+class=(["\'])desc_span\1[^>]*>(?P[^<]+)
', + r']+id=(["\'])defchat\1[^>]*>\s*

(?P[^<]+)<'], + webpage, 'error', group='error', default=None) + if not error: + if any(p in webpage for p in ( + self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): + error = self._ROOM_OFFLINE + if error: + raise ExtractorError(error, expected=True) + raise ExtractorError('Unable to find stream URL') + + formats = [] + for m3u8_url in m3u8_urls: + for known_id in ('fast', 'slow'): + if '_%s' % known_id in m3u8_url: + m3u8_id = known_id + break + else: + m3u8_id = None + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, + 'age_limit': self._rta_search(webpage), + 'is_live': True, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/chilloutzone.py b/plugins/youtube_download/yt_dlp/extractor/chilloutzone.py new file mode 100644 index 0000000..fd5202b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/chilloutzone.py @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..compat import compat_b64decode +from ..utils import ( + clean_html, + ExtractorError +) + + +class ChilloutzoneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w|-]+)\.html' + _TESTS = [{ + 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'md5': 'a76f3457e813ea0037e5244f509e66d1', + 'info_dict': { + 'id': 'enemene-meck-alle-katzen-weg', + 'ext': 'mp4', + 'title': 'Enemene Meck - Alle Katzen weg', + 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + }, + }, { + 'note': 'Video hosted at YouTube', + 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'info_dict': { + 'id': '1YVQaAgHyRU', + 'ext': 'mp4', + 'title': '16 Photos Taken 1 Second Before Disaster', + 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', + 'uploader': 'BuzzFeedVideo', + 'uploader_id': 'BuzzFeedVideo', + 'upload_date': '20131105', + }, + }, { + 'note': 'Video hosted at Vimeo', + 'url': 'http://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'info_dict': { + 'id': '85523671', + 'ext': 'mp4', + 'title': 'The Sunday Times - Icons', + 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', + 'uploader': 'Us', + 'uploader_id': 'usfilms', + 'upload_date': '20140131' + }, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + base64_video_info = self._html_search_regex( + r'var cozVidData = "(.+?)";', webpage, 'video data') + decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') + video_info_dict = json.loads(decoded_video_info) + + # get video information from dict + video_url = video_info_dict['mediaUrl'] + description = clean_html(video_info_dict.get('description')) + title = video_info_dict['title'] + native_platform = video_info_dict['nativePlatform'] + native_video_id = video_info_dict['nativeVideoId'] + source_priority = video_info_dict['sourcePriority'] + + # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) + if native_platform is None: + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or + # the own CDN + if source_priority == 'native': + if native_platform == 'youtube': + return self.url_result(native_video_id, ie='Youtube') + if native_platform == 'vimeo': + return self.url_result( + 'http://vimeo.com/' + native_video_id, ie='Vimeo') + + if not video_url: + raise ExtractorError('No video found') + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/chingari.py b/plugins/youtube_download/yt_dlp/extractor/chingari.py new file mode 100644 index 0000000..e6841fb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/chingari.py @@ -0,0 +1,209 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote_plus +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_to_int, + url_or_none, +) + + +class ChingariBaseIE(InfoExtractor): + def _get_post(self, id, post_data): + media_data = post_data['mediaLocation'] + base_url = media_data['base'] + author_data = post_data.get('authorData', {}) + song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author' + + formats = [{ + 'format_id': frmt, + 'width': str_to_int(frmt[1:]), + 'url': base_url + frmt_path, + } for frmt, frmt_path in media_data.get('transcoded', {}).items()] + + if media_data.get('path'): + formats.append({ + 'format_id': 'original', + 'format_note': 'Direct video.', + 'url': base_url + '/apipublic' + media_data['path'], + 'quality': 10, + }) + self._sort_formats(formats) + timestamp = str_to_int(post_data.get('created_at')) + if timestamp: + timestamp = int_or_none(timestamp, 1000) + + thumbnail, uploader_url = None, None + if media_data.get('thumbnail'): + thumbnail = base_url + media_data.get('thumbnail') + if author_data.get('username'): + uploader_url = 'https://chingari.io/' + author_data.get('username') + + return { + 'id': id, + 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), + 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), + 'duration': media_data.get('duration'), + 'thumbnail': url_or_none(thumbnail), + 'like_count': post_data.get('likeCount'), + 'view_count': post_data.get('viewsCount'), + 'comment_count': post_data.get('commentCount'), + 'repost_count': post_data.get('shareCount'), + 'timestamp': timestamp, + 'uploader_id': post_data.get('userId') or author_data.get('_id'), + 'uploader': author_data.get('name'), + 'uploader_url': url_or_none(uploader_url), + 'track': song_data.get('title'), + 'artist': song_data.get('author'), + 'formats': formats, + } + + +class ChingariIE(ChingariBaseIE): + _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P[^&/#?]+)' + _TESTS = [{ + 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb', + 'info_dict': { + 'id': '612f8f4ce1dc57090e8a7beb', + 'ext': 'mp4', + 'title': 'Happy birthday Srila Prabhupada', + 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa', + 'duration': 0, + 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1630506828, + 'upload_date': '20210901', + 'uploader_id': '5f0403982c8bd344f4813f8c', + 'uploader': 'ISKCON,Inc.', + 'uploader_url': 'https://chingari.io/iskcon,inc', + 'track': None, + 'artist': None, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id) + if post_json['code'] != 200: + raise ExtractorError(post_json['message'], expected=True) + post_data = post_json['data'] + return self._get_post(id, post_data) + + +class ChingariUserIE(ChingariBaseIE): + _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)' + _TESTS = [{ + 'url': 'https://chingari.io/dada1023', + 'playlist_mincount': 3, + 'info_dict': { + 'id': 'dada1023', + }, + 'entries': [{ + 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a', + 'info_dict': { + 'id': '614781f3ade60b3a0bfff42a', + 'ext': 'mp4', + 'title': '#chingaribappa ', + 'description': 'md5:d1df21d84088770468fa63afe3b17857', + 'duration': 7, + 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1632076275, + 'upload_date': '20210919', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba', + 'info_dict': { + 'id': '6146b132bcbf860959e12cba', + 'ext': 'mp4', + 'title': 'Tactor harvesting', + 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7', + 'duration': 59.3, + 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1632022834, + 'upload_date': '20210919', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82', + 'info_dict': { + 'id': '6145651b74cb030a64c40b82', + 'ext': 'mp4', + 'title': '#odiabhajan ', + 'description': 'md5:687ea36835b9276cf2af90f25e7654cb', + 'duration': 56.67, + 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1631937819, + 'upload_date': '20210918', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }], + }, { + 'url': 'https://chingari.io/iskcon%2Cinc', + 'playlist_mincount': 1025, + 'info_dict': { + 'id': 'iskcon%2Cinc', + }, + }] + + def _entries(self, id): + skip = 0 + has_more = True + for page in itertools.count(): + posts = self._download_json('https://api.chingari.io/users/getPosts', id, + data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(), + headers={'content-type': 'application/json;charset=UTF-8'}, + note='Downloading page %s' % page) + for post in posts.get('data', []): + post_data = post['post'] + yield self._get_post(post_data['_id'], post_data) + skip += 20 + has_more = posts['hasMoreData'] + if not has_more: + break + + def _real_extract(self, url): + alt_id = self._match_id(url) + post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id) + if post_json['code'] != 200: + raise ExtractorError(post_json['message'], expected=True) + id = post_json['data']['_id'] + return self.playlist_result(self._entries(id), playlist_id=alt_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/chirbit.py b/plugins/youtube_download/yt_dlp/extractor/chirbit.py new file mode 100644 index 0000000..8d75cdf --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/chirbit.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_b64decode +from ..utils import parse_duration + + +class ChirbitIE(InfoExtractor): + IE_NAME = 'chirbit' + _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P[\da-zA-Z]+)' + _TESTS = [{ + 'url': 'http://chirb.it/be2abG', + 'info_dict': { + 'id': 'be2abG', + 'ext': 'mp3', + 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', + 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', + 'duration': 306, + 'uploader': 'Gerryaudio', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', + 'only_matching': True, + }, { + 'url': 'https://chirb.it/wp/MN58c2', + 'only_matching': True, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://chirb.it/%s' % audio_id, audio_id) + + data_fd = self._search_regex( + r'data-fd=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data fd', group='url') + + # Reverse engineered from https://chirb.it/js/chirbit.player.js (look + # for soundURL) + audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') + + title = self._search_regex( + r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') + description = self._search_regex( + r'

Description

\s*]*>([^<]+)', + webpage, 'description', default=None) + duration = parse_duration(self._search_regex( + r'class=["\']c-length["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + + return { + 'id': audio_id, + 'url': audio_url, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + } + + +class ChirbitProfileIE(InfoExtractor): + IE_NAME = 'chirbit:profile' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' + _TEST = { + 'url': 'http://chirbit.com/ScarletBeauty', + 'info_dict': { + 'id': 'ScarletBeauty', + }, + 'playlist_mincount': 3, + } + + def _real_extract(self, url): + profile_id = self._match_id(url) + + webpage = self._download_webpage(url, profile_id) + + entries = [ + self.url_result(self._proto_relative_url('//chirb.it/' + video_id)) + for _, video_id in re.findall(r']+id=([\'"])copy-btn-(?P[0-9a-zA-Z]+)\1', webpage)] + + return self.playlist_result(entries, profile_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/cinchcast.py b/plugins/youtube_download/yt_dlp/extractor/cinchcast.py new file mode 100644 index 0000000..b861d54 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cinchcast.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + xpath_text, +) + + +class CinchcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', + 'info_dict': { + 'id': '5258197', + 'ext': 'mp3', + 'title': 'Train Your Brain to Up Your Game with Coach Mandy', + 'upload_date': '20130816', + }, + }, { + # Actual test is run in generic, look for undergroundwellness + 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + doc = self._download_xml( + 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, + video_id) + + item = doc.find('.//item') + title = xpath_text(item, './title', fatal=True) + date_str = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}date') + upload_date = unified_strdate(date_str, day_first=False) + # duration is present but wrong + formats = [{ + 'format_id': 'main', + 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], + }] + backup_url = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}backupContent') + if backup_url: + formats.append({ + 'preference': 2, # seems to be more reliable + 'format_id': 'backup', + 'url': backup_url, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cinemax.py b/plugins/youtube_download/yt_dlp/extractor/cinemax.py new file mode 100644 index 0000000..2c3ff8d --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cinemax.py @@ -0,0 +1,28 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .hbo import HBOBaseIE + + +class CinemaxIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P[^/]+/video/[0-9a-z-]+-(?P\d+))' + _TESTS = [{ + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', + 'md5': '82e0734bba8aa7ef526c9dd00cf35a05', + 'info_dict': { + 'id': '20126903', + 'ext': 'mp4', + 'title': 'S1 Ep 1: Recap', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + }, { + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + path, video_id = self._match_valid_url(url).groups() + info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info['id'] = video_id + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/ciscolive.py b/plugins/youtube_download/yt_dlp/extractor/ciscolive.py new file mode 100644 index 0000000..349c5eb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ciscolive.py @@ -0,0 +1,148 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + int_or_none, + parse_qs, + try_get, + urlencode_postdata, +) + + +class CiscoLiveBaseIE(InfoExtractor): + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID, + 'rfWidgetId': RAINFOCUS_WIDGET_ID, + } + + def _call_api(self, ep, rf_id, query, referrer, note=None): + headers = self.HEADERS.copy() + headers['Referer'] = referrer + return self._download_json( + self.RAINFOCUS_API_URL % ep, rf_id, note=note, + data=urlencode_postdata(query), headers=headers) + + def _parse_rf_item(self, rf_item): + event_name = rf_item.get('eventName') + title = rf_item['title'] + description = clean_html(rf_item.get('abstract')) + presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) + bc_id = rf_item['videos'][0]['url'] + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id + duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + location = try_get(rf_item, lambda x: x['times'][0]['room']) + + if duration: + duration = duration * 60 + + return { + '_type': 'url_transparent', + 'url': bc_url, + 'ie_key': 'BrightcoveNew', + 'title': title, + 'description': description, + 'duration': duration, + 'creator': presenter_name, + 'location': location, + 'series': event_name, + } + + +class CiscoLiveSessionIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P[^/?&]+)' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'upload_date': '20180629', + 'uploader_id': '5647924234001', + 'location': '16B Mezz.', + }, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS', + 'only_matching': True, + }] + + def _real_extract(self, url): + rf_id = self._match_id(url) + rf_result = self._call_api('session', rf_id, {'id': rf_id}, url) + return self._parse_rf_item(rf_result['items'][0]) + + +class CiscoLiveSearchIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'info_dict': { + 'title': 'Search query', + }, + 'playlist_count': 5, + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + + @staticmethod + def _check_bc_id_exists(rf_item): + return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + + def _entries(self, query, url): + query['size'] = 50 + query['from'] = 0 + for page_num in itertools.count(1): + results = self._call_api( + 'search', None, query, url, + 'Downloading search JSON page %d' % page_num) + sl = try_get(results, lambda x: x['sectionList'][0], dict) + if sl: + results = sl + items = results.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + if not self._check_bc_id_exists(item): + continue + yield self._parse_rf_item(item) + size = int_or_none(results.get('size')) + if size is not None: + query['size'] = size + total = int_or_none(results.get('total')) + if total is not None and query['from'] + query['size'] > total: + break + query['from'] += query['size'] + + def _real_extract(self, url): + query = parse_qs(url) + query['type'] = 'session' + return self.playlist_result( + self._entries(query, url), playlist_title='Search query') diff --git a/plugins/youtube_download/yt_dlp/extractor/ciscowebex.py b/plugins/youtube_download/yt_dlp/extractor/ciscowebex.py new file mode 100644 index 0000000..882dae9 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ciscowebex.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) + + +class CiscoWebexIE(InfoExtractor): + IE_NAME = 'ciscowebex' + IE_DESC = 'Cisco Webex' + _VALID_URL = r'''(?x) + (?Phttps?://(?P[^/#?]*)\.webex\.com/(?: + (?P[^/#?]*)/(?:ldr|lsr).php\?(?:[^#]*&)*RCID=(?P[0-9a-f]{32})| + (?:recordingservice|webappng)/sites/(?P[^/#?]*)/recording/(?:playback/|play/)?(?P[0-9a-f]{32}) + ))''' + + _TESTS = [{ + 'url': 'https://demosubdomain.webex.com/demositeurl/ldr.php?RCID=e58e803bc0f766bb5f6376d2e86adb5b', + 'only_matching': True, + }, { + 'url': 'http://demosubdomain.webex.com/demositeurl/lsr.php?RCID=bc04b4a7b5ea2cc3a493d5ae6aaff5d7', + 'only_matching': True, + }, { + 'url': 'https://demosubdomain.webex.com/recordingservice/sites/demositeurl/recording/88e7a42f7b19f5b423c54754aecc2ce9/playback', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + rcid = mobj.group('rcid') + if rcid: + webpage = self._download_webpage(url, None, note='Getting video ID') + url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') + url = self._request_webpage(url, None, note='Resolving final URL').geturl() + mobj = self._match_valid_url(url) + subdomain = mobj.group('subdomain') + siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') + video_id = mobj.group('id') + + stream = self._download_json( + 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + video_id, fatal=False, query={'siteurl': siteurl}) + if not stream: + self.raise_login_required(method='cookies') + + video_id = stream.get('recordUUID') or video_id + + formats = [{ + 'format_id': 'video', + 'url': stream['fallbackPlaySrc'], + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }] + if stream.get('preventDownload') is False: + mp4url = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['mp4URL']) + if mp4url: + formats.append({ + 'format_id': 'video', + 'url': mp4url, + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }) + audiourl = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['audioURL']) + if audiourl: + formats.append({ + 'format_id': 'audio', + 'url': audiourl, + 'ext': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': stream['recordName'], + 'description': stream.get('description'), + 'uploader': stream.get('ownerDisplayName'), + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'timestamp': unified_timestamp(stream.get('createTime')), + 'duration': int_or_none(stream.get('duration'), 1000), + 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cjsw.py b/plugins/youtube_download/yt_dlp/extractor/cjsw.py new file mode 100644 index 0000000..1dea0d7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cjsw.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + unescapeHTML, +) + + +class CJSWIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P[^/]+)/episode/(?P\d+)' + _TESTS = [{ + 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', + 'md5': 'cee14d40f1e9433632c56e3d14977120', + 'info_dict': { + 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', + 'ext': 'mp3', + 'title': 'Freshly Squeezed – Episode June 20, 2017', + 'description': 'md5:c967d63366c3898a80d0c7b0ff337202', + 'series': 'Freshly Squeezed', + 'episode_id': '20170620', + }, + }, { + # no description + 'url': 'http://cjsw.com/program/road-pops/episode/20170707/', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + program, episode_id = mobj.group('program', 'id') + audio_id = '%s/%s' % (program, episode_id) + + webpage = self._download_webpage(url, episode_id) + + title = unescapeHTML(self._search_regex( + (r']+class=["\']episode-header__title["\'][^>]*>(?P[^<]+)', + r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title')) + + audio_url = self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'audio url', group='url') + + audio_id = self._search_regex( + r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', + audio_url, 'audio id', default=audio_id) + + formats = [{ + 'url': audio_url, + 'ext': determine_ext(audio_url, 'mp3'), + 'vcodec': 'none', + }] + + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', + default=None) + series = self._search_regex( + r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, + 'series', default=program, group='name') + + return { + 'id': audio_id, + 'title': title, + 'description': description, + 'formats': formats, + 'series': series, + 'episode_id': episode_id, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cliphunter.py b/plugins/youtube_download/yt_dlp/extractor/cliphunter.py new file mode 100644 index 0000000..f2ca7a3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cliphunter.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + url_or_none, +) + + +class CliphunterIE(InfoExtractor): + IE_NAME = 'cliphunter' + + _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/ + (?P<id>[0-9]+)/ + (?P<seo>.+?)(?:$|[#\?]) + ''' + _TESTS = [{ + 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', + 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', + 'info_dict': { + 'id': '1012420', + 'ext': 'flv', + 'title': 'Fun Jynx Maze solo', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + 'skip': 'Video gone', + }, { + 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz', + 'md5': '55a723c67bfc6da6b0cfa00d55da8a27', + 'info_dict': { + 'id': '2019449', + 'ext': 'mp4', + 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_title = self._search_regex( + r'mediaTitle = "([^"]+)"', webpage, 'title') + + gexo_files = self._parse_json( + self._search_regex( + r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'), + video_id) + + formats = [] + for format_id, f in gexo_files.items(): + video_url = url_or_none(f.get('url')) + if not video_url: + continue + fmt = f.get('fmt') + height = f.get('h') + format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': int_or_none(f.get('w')), + 'height': int_or_none(height), + 'tbr': int_or_none(f.get('br')), + }) + self._sort_formats(formats) + + thumbnail = self._search_regex( + r"var\s+mov_thumb\s*=\s*'([^']+)';", + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'age_limit': self._rta_search(webpage), + 'thumbnail': thumbnail, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/clippit.py b/plugins/youtube_download/yt_dlp/extractor/clippit.py new file mode 100644 index 0000000..a1a7a77 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/clippit.py @@ -0,0 +1,74 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + qualities, +) + +import re + + +class ClippitIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)' + _TEST = { + 'url': 'https://www.clippituser.tv/c/evmgm', + 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', + 'info_dict': { + 'id': 'evmgm', + 'ext': 'mp4', + 'title': 'Bye bye Brutus. #BattleBots - Clippit', + 'uploader': 'lizllove', + 'uploader_url': 'https://www.clippituser.tv/p/lizllove', + 'timestamp': 1472183818, + 'upload_date': '20160826', + 'description': 'BattleBots | ABC', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<title.*>(.+?)', webpage, 'title') + + FORMATS = ('sd', 'hd') + quality = qualities(FORMATS) + formats = [] + for format_id in FORMATS: + url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + webpage, 'url', fatal=False) + if not url: + continue + match = re.search(r'/(?P\d+)\.mp4', url) + formats.append({ + 'url': url, + 'format_id': format_id, + 'quality': quality(format_id), + 'height': int(match.group('height')) if match else None, + }) + + uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', + webpage, 'uploader', fatal=False) + uploader_url = ('https://www.clippituser.tv/p/' + uploader + if uploader else None) + + timestamp = self._html_search_regex(r'datetime="(.+?)"', + webpage, 'date', fatal=False) + thumbnail = self._html_search_regex(r'data-image="(.+?)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'timestamp': parse_iso8601(timestamp), + 'description': self._og_search_description(webpage), + 'thumbnail': thumbnail, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cliprs.py b/plugins/youtube_download/yt_dlp/extractor/cliprs.py new file mode 100644 index 0000000..d55b26d --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cliprs.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .onet import OnetBaseIE + + +class ClipRsIE(OnetBaseIE): + _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' + _TEST = { + 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', + 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', + 'info_dict': { + 'id': '1488842.1399140381', + 'ext': 'mp4', + 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', + 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', + 'duration': 229, + 'timestamp': 1459850243, + 'upload_date': '20160405', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + mvp_id = self._search_mvp_id(webpage) + + info_dict = self._extract_from_id(mvp_id, webpage) + info_dict['display_id'] = display_id + + return info_dict diff --git a/plugins/youtube_download/yt_dlp/extractor/clipsyndicate.py b/plugins/youtube_download/yt_dlp/extractor/clipsyndicate.py new file mode 100644 index 0000000..6cdb42f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/clipsyndicate.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + find_xpath_attr, + fix_xml_ampersands +) + + +class ClipsyndicateIE(InfoExtractor): + _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P\d+)' + + _TESTS = [{ + 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', + 'md5': '4d7d549451bad625e0ff3d7bd56d776c', + 'info_dict': { + 'id': '4629301', + 'ext': 'mp4', + 'title': 'Brick Briscoe', + 'duration': 612, + 'thumbnail': r're:^https?://.+\.jpg', + }, + }, { + 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + js_player = self._download_webpage( + 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, + video_id, 'Downlaoding player') + # it includes a required token + flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') + + pdoc = self._download_xml( + 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, + video_id, 'Downloading video info', + transform_source=fix_xml_ampersands) + + track_doc = pdoc.find('trackList/track') + + def find_param(name): + node = find_xpath_attr(track_doc, './/param', 'name', name) + if node is not None: + return node.attrib['value'] + + return { + 'id': video_id, + 'title': find_param('title'), + 'url': track_doc.find('location').text, + 'thumbnail': find_param('thumbnail'), + 'duration': int(find_param('duration')), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/closertotruth.py b/plugins/youtube_download/yt_dlp/extractor/closertotruth.py new file mode 100644 index 0000000..26243d5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/closertotruth.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CloserToTruthIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', + 'info_dict': { + 'id': '0_zof1ktre', + 'display_id': 'solutions-the-mind-body-problem', + 'ext': 'mov', + 'title': 'Solutions to the Mind-Body Problem?', + 'upload_date': '20140221', + 'timestamp': 1392956007, + 'uploader_id': 'CTTXML' + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://closertotruth.com/episodes/how-do-brains-work', + 'info_dict': { + 'id': '0_iuxai6g6', + 'display_id': 'how-do-brains-work', + 'ext': 'mov', + 'title': 'How do Brains Work?', + 'upload_date': '20140221', + 'timestamp': 1392956024, + 'uploader_id': 'CTTXML' + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://closertotruth.com/interviews/1725', + 'info_dict': { + 'id': '1725', + 'title': 'AyaFr-002', + }, + 'playlist_mincount': 2, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + partner_id = self._search_regex( + r']+src=["\'].*?\b(?:partner_id|p)/(\d+)', + webpage, 'kaltura partner_id') + + title = self._search_regex( + r'(.+?)\s*\|\s*.+?', webpage, 'video title') + + select = self._search_regex( + r'(?s)]+id="select-version"[^>]*>(.+?)', + webpage, 'select version', default=None) + if select: + entry_ids = set() + entries = [] + for mobj in re.finditer( + r']+value=(["\'])(?P[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P[^<]+)', + webpage): + entry_id = mobj.group('id') + if entry_id in entry_ids: + continue + entry_ids.add(entry_id) + entries.append({ + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'ie_key': 'Kaltura', + 'title': mobj.group('title'), + }) + if entries: + return self.playlist_result(entries, display_id, title) + + entry_id = self._search_regex( + r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', + webpage, 'kaltura entry_id', group='id') + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'ie_key': 'Kaltura', + 'title': title + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cloudflarestream.py b/plugins/youtube_download/yt_dlp/extractor/cloudflarestream.py new file mode 100644 index 0000000..2fdcfbb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cloudflarestream.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re + +from .common import InfoExtractor + + +class CloudflareStreamIE(InfoExtractor): + _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' + _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE + _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:watch\.)?%s/| + %s + ) + (?P<id>%s) + ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE) + _TESTS = [{ + 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', + 'info_dict': { + 'id': '31c9291ab41fac05471db4e73aa11717', + 'ext': 'mp4', + 'title': '31c9291ab41fac05471db4e73aa11717', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', + 'only_matching': True, + }, { + 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', + 'only_matching': True, + }, { + 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE), + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' + base_url = 'https://%s/%s/' % (domain, video_id) + if '.' in video_id: + video_id = self._parse_json(base64.urlsafe_b64decode( + video_id.split('.')[1]), video_id)['sub'] + manifest_base_url = base_url + 'manifest/video.' + + formats = self._extract_m3u8_formats( + manifest_base_url + 'm3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': base_url + 'thumbnails/thumbnail.jpg', + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cloudy.py b/plugins/youtube_download/yt_dlp/extractor/cloudy.py new file mode 100644 index 0000000..85ca20e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cloudy.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + str_to_int, + unified_strdate, +) + + +class CloudyIE(InfoExtractor): + _IE_DESC = 'cloudy.ec' + _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '29832b05028ead1b58be86bf319397ca', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'mp4', + 'title': 'Funny Cats and Animals Compilation june 2013', + 'upload_date': '20130913', + 'view_count': int, + } + }, { + 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://www.cloudy.ec/embed.php', video_id, query={ + 'id': video_id, + 'playerPage': 1, + 'autoplay': 1, + }) + + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + + webpage = self._download_webpage( + 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) + + if webpage: + info.update({ + 'title': self._search_regex( + r'<h\d[^>]*>([^<]+)<', webpage, 'title'), + 'upload_date': unified_strdate(self._search_regex( + r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, + 'upload date', fatal=False)), + 'view_count': str_to_int(self._search_regex( + r'([\d,.]+) views<', webpage, 'view count', fatal=False)), + }) + + if not info.get('title'): + info['title'] = video_id + + info['id'] = video_id + + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/clubic.py b/plugins/youtube_download/yt_dlp/extractor/clubic.py new file mode 100644 index 0000000..98f9cb5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/clubic.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + qualities, +) + + +class ClubicIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', + 'md5': '1592b694ba586036efac1776b0b43cd3', + 'info_dict': { + 'id': '448474', + 'ext': 'mp4', + 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', + 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', + 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', + } + }, { + 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id + player_page = self._download_webpage(player_url, video_id) + + config = self._parse_json(self._search_regex( + r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, + 'configuration'), video_id) + + video_info = config['videoInfo'] + sources = config['sources'] + quality_order = qualities(['sd', 'hq']) + + formats = [{ + 'format_id': src['streamQuality'], + 'url': src['src'], + 'quality': quality_order(src['streamQuality']), + } for src in sources] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'description': clean_html(video_info.get('description')), + 'thumbnail': config.get('poster'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/clyp.py b/plugins/youtube_download/yt_dlp/extractor/clyp.py new file mode 100644 index 0000000..e6b2ac4 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/clyp.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + parse_qs, + unified_timestamp, +) + + +class ClypIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://clyp.it/ojz2wfah', + 'md5': '1d4961036c41247ecfdcc439c0cddcbb', + 'info_dict': { + 'id': 'ojz2wfah', + 'ext': 'mp3', + 'title': 'Krisson80 - bits wip wip', + 'description': '#Krisson80BitsWipWip #chiptune\n#wip', + 'duration': 263.21, + 'timestamp': 1443515251, + 'upload_date': '20150929', + }, + }, { + 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d', + 'info_dict': { + 'id': 'b04p1odi', + 'ext': 'mp3', + 'title': 'GJ! (Reward Edit)', + 'description': 'Metal Resistance (THE ONE edition)', + 'duration': 177.789, + 'timestamp': 1528241278, + 'upload_date': '20180605', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + qs = parse_qs(url) + token = qs.get('token', [None])[0] + + query = {} + if token: + query['token'] = token + + metadata = self._download_json( + 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) + + formats = [] + for secure in ('', 'Secure'): + for ext in ('Ogg', 'Mp3'): + format_id = '%s%s' % (secure, ext) + format_url = metadata.get('%sUrl' % format_id) + if format_url: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'vcodec': 'none', + }) + self._sort_formats(formats) + + title = metadata['Title'] + description = metadata.get('Description') + duration = float_or_none(metadata.get('Duration')) + timestamp = unified_timestamp(metadata.get('DateCreated')) + + return { + 'id': audio_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/cmt.py b/plugins/youtube_download/yt_dlp/extractor/cmt.py new file mode 100644 index 0000000..a4ddb91 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cmt.py @@ -0,0 +1,56 @@ +from __future__ import unicode_literals + +from .mtv import MTVIE + +# TODO Remove - Reason: Outdated Site + + +class CMTIE(MTVIE): + IE_NAME = 'cmt.com' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' + + _TESTS = [{ + 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2', + 'info_dict': { + 'id': '989124', + 'ext': 'mp4', + 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + 'description': 'Blame It All On My Roots', + }, + 'skip': 'Video not available', + }, { + 'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908', + 'md5': 'e61a801ca4a183a466c08bd98dccbb1c', + 'info_dict': { + 'id': '1504699', + 'ext': 'mp4', + 'title': 'Still The King Ep. 109 in 3 Minutes', + 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.', + 'timestamp': 1469421000.0, + 'upload_date': '20160725', + }, + }, { + 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, + }] + + def _extract_mgid(self, webpage, url): + mgid = self._search_regex( + r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage, url) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/plugins/youtube_download/yt_dlp/extractor/cnbc.py b/plugins/youtube_download/yt_dlp/extractor/cnbc.py new file mode 100644 index 0000000..da3730c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cnbc.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CNBCIE(InfoExtractor): + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'info_dict': { + 'id': '3000503714', + 'ext': 'mp4', + 'title': 'Fighting zombies is big business', + 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', + 'timestamp': 1459332000, + 'upload_date': '20160330', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, + {'force_smil_url': True}), + 'id': video_id, + } + + +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': "Trump: I don't necessarily agree with raising rates", + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + path, display_id = self._match_valid_url(url).groups() + video_id = self._download_json( + 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ + 'query': '''{ + page(path: "%s") { + vcpsId + } +}''' % path, + })['data']['page']['vcpsId'] + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%d' % video_id, + CNBCIE.ie_key()) diff --git a/plugins/youtube_download/yt_dlp/extractor/cnn.py b/plugins/youtube_download/yt_dlp/extractor/cnn.py new file mode 100644 index 0000000..af11d95 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/cnn.py @@ -0,0 +1,146 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from .turner import TurnerBaseIE +from ..utils import url_basename + + +class CNNIE(TurnerBaseIE): + _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' + + _TESTS = [{ + 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', + 'md5': '3e6121ea48df7e2259fe73a0628605c4', + 'info_dict': { + 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', + 'ext': 'mp4', + 'title': 'Nadal wins 8th French Open title', + 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + 'duration': 135, + 'upload_date': '20130609', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', + 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', + 'info_dict': { + 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', + 'ext': 'mp4', + 'title': "Student's epic speech stuns new freshmen", + 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + 'upload_date': '20130821', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', + 'md5': 'f14d02ebd264df951feb2400e2c25a1b', + 'info_dict': { + 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', + 'ext': 'mp4', + 'title': 'Nashville Ep. 1: Hand crafted skateboards', + 'description': 'md5:e7223a503315c9f150acac52e76de086', + 'upload_date': '20141222', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', + 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', + 'info_dict': { + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'ext': 'mp4', + 'title': '5 stunning stats about Netflix', + 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', + 'upload_date': '20160819', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', + 'only_matching': True, + }, { + 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', + 'only_matching': True, + }, { + 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', + 'only_matching': True, + }] + + _CONFIG = { + # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml + 'edition': { + 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', + 'media_src': 'http://pmd.cdn.turner.com/cnn/big', + }, + # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml + 'money': { + 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', + 'media_src': 'http://ht3.cdn.turner.com/money/big', + }, + } + + def _extract_timestamp(self, video_data): + # TODO: fix timestamp extraction + return None + + def _real_extract(self, url): + sub_domain, path, page_title = self._match_valid_url(url).groups() + if sub_domain not in ('money', 'edition'): + sub_domain = 'edition' + config = self._CONFIG[sub_domain] + return self._extract_cvp_info( + config['data_src'] % path, page_title, { + 'default': { + 'media_src': config['media_src'], + }, + 'f4m': { + 'host': 'cnn-vh.akamaihd.net', + }, + }) + + +class CNNBlogsIE(InfoExtractor): + _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+' + _TEST = { + 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', + 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', + 'info_dict': { + 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', + 'ext': 'mp4', + 'title': 'Criminalizing journalism?', + 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', + 'upload_date': '20140209', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + 'add_ie': ['CNN'], + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, url_basename(url)) + cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') + return self.url_result(cnn_url, CNNIE.ie_key()) + + +class CNNArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' + _TEST = { + 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', + 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', + 'info_dict': { + 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', + 'ext': 'mp4', + 'title': 'Obama: Cyberattack not an act of war', + 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', + 'upload_date': '20141221', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + 'add_ie': ['CNN'], + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, url_basename(url)) + cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url') + return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key()) diff --git a/plugins/youtube_download/yt_dlp/extractor/comedycentral.py b/plugins/youtube_download/yt_dlp/extractor/comedycentral.py new file mode 100644 index 0000000..5a12ab5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/comedycentral.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor + + +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P<id>[0-9a-z]{6})' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike', + 'md5': 'b8acb347177c680ff18a292aa2166f80', + 'info_dict': { + 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025', + 'ext': 'mp4', + 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike', + 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498', + 'timestamp': 1598670000, + 'upload_date': '20200829', + }, + }, { + 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314', + 'only_matching': True, + }, { + 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate', + 'only_matching': True, + }, { + 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb', + 'only_matching': True, + }] + + +class ComedyCentralTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})' + _TESTS = [{ + 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1', + 'info_dict': { + 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285', + 'ext': 'mp4', + 'title': 'Josh Investigates', + 'description': 'Steht uns das Ende der Welt bevor?', + }, + }] + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' + _GEO_COUNTRIES = ['DE'] + + def _get_feed_query(self, uri): + return { + 'accountOverride': 'intl.mtvi.com', + 'arcEp': 'web.cc.tv', + 'ep': 'b9032c3a', + 'imageEp': 'web.cc.tv', + 'mgid': uri, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/common.py b/plugins/youtube_download/yt_dlp/extractor/common.py new file mode 100644 index 0000000..ac9e285 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/common.py @@ -0,0 +1,3774 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import collections +import hashlib +import itertools +import json +import netrc +import os +import random +import re +import sys +import time +import math + +from ..compat import ( + compat_cookiejar_Cookie, + compat_cookies_SimpleCookie, + compat_etree_Element, + compat_etree_fromstring, + compat_expanduser, + compat_getpass, + compat_http_client, + compat_os_name, + compat_str, + compat_urllib_error, + compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, + compat_urllib_request, + compat_urlparse, + compat_xml_parse_error, +) +from ..downloader import FileDownloader +from ..downloader.f4m import ( + get_base_url, + remove_encrypted_media, +) +from ..utils import ( + age_restricted, + base_url, + bug_reports_message, + clean_html, + compiled_regex_type, + determine_ext, + determine_protocol, + dict_get, + encode_data_uri, + error_to_compat_str, + extract_attributes, + ExtractorError, + fix_xml_ampersands, + float_or_none, + format_field, + GeoRestrictedError, + GeoUtils, + int_or_none, + join_nonempty, + js_to_json, + JSON_LD_RE, + mimetype2ext, + network_exceptions, + NO_DEFAULT, + orderedSet, + parse_bitrate, + parse_codecs, + parse_duration, + parse_iso8601, + parse_m3u8_attributes, + parse_resolution, + RegexNotFoundError, + sanitize_filename, + sanitized_Request, + str_or_none, + str_to_int, + strip_or_none, + traverse_obj, + unescapeHTML, + UnsupportedError, + unified_strdate, + unified_timestamp, + update_Request, + update_url_query, + url_basename, + url_or_none, + urljoin, + variadic, + xpath_element, + xpath_text, + xpath_with_ns, +) + + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the YoutubeDL. The YoutubeDL processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The type field determines the type of the result. + By far the most common value (and the default if _type is missing) is + "video", which indicates a single video. + + For a video, the dictionaries must include the following fields: + + id: Video identifier. + title: Video title, unescaped. + + Additionally, it must contain either a formats entry or a url one: + + formats: A list of dictionaries for each format available, ordered + from worst to best quality. + + Potential fields: + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH + - HTTP URL to plain file media (in case of + unfragmented media) + - URL of the MPD manifest or base URL + representing the media if MPD manifest + is parsed from a string (in case of + fragmented media) + for MSS - URL of the ISM manifest. + * manifest_url + The URL of the manifest file in case of + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. + * ext Will be calculated from URL if missing + * format A human-readable description of the format + ("mp4 container with h264/opus"). + Calculated from the format_id, width, height. + and format_note fields if missing. + * format_id A short description of the format + ("mp4_h264_opus" or "19"). + Technically optional, but strongly recommended. + * format_note Additional info about the format + ("3D" or "DASH video") + * width Width of the video, if known + * height Height of the video, if known + * resolution Textual description of width and height + * dynamic_range The dynamic range of the video. One of: + "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV" + * tbr Average bitrate of audio and video in KBit/s + * abr Average audio bitrate in KBit/s + * acodec Name of the audio codec in use + * asr Audio sampling rate in Hertz + * vbr Average video bitrate in KBit/s + * fps Frame rate + * vcodec Name of the video codec in use + * container Name of the container format + * filesize The number of bytes, if known in advance + * filesize_approx An estimate for the number of bytes + * player_url SWF Player URL (used for rtmpdump). + * protocol The protocol that will be used for the actual + download, lower-case. One of "http", "https" or + one of the protocols defined in downloader.PROTOCOL_MAP + * fragment_base_url + Base URL for fragments. Each fragment's path + value (if present) will be relative to + this URL. + * fragments A list of fragments of a fragmented media. + Each fragment entry must contain either an url + or a path. If an url is present it should be + considered by a client. Otherwise both path and + fragment_base_url must be present. Here is + the list of all potential fields: + * "url" - fragment's URL + * "path" - fragment's path relative to + fragment_base_url + * "duration" (optional, int or float) + * "filesize" (optional, int) + * is_from_start Is a live format that can be downloaded + from the start. Boolean + * preference Order number of this format. If this field is + present and not None, the formats get sorted + by this field, regardless of all other values. + -1 for default (order by other properties), + -2 or smaller for less than default. + < -1000 to hide the format (if there is + another one which is strictly better) + * language Language code, e.g. "de" or "en-US". + * language_preference Is this in the language mentioned in + the URL? + 10 if it's what the URL is about, + -1 for default (don't know), + -10 otherwise, other values reserved for now. + * quality Order number of the video quality of this + format, irrespective of the file format. + -1 for default (order by other properties), + -2 or smaller for less than default. + * source_preference Order number for this video source + (quality takes higher priority) + -1 for default (order by other properties), + -2 or smaller for less than default. + * http_headers A dictionary of additional HTTP headers + to add to the request. + * stretched_ratio If given and not 1, indicates that the + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + * has_drm The format has DRM and cannot be downloaded. Boolean + * downloader_options A dictionary of downloader options as + described in FileDownloader + RTMP formats can also have the additional fields: page_url, + app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, + rtmp_protocol, rtmp_real_time + + url: Final video URL. + ext: Video filename extension. + format: The video format, defaults to ext (used for --get-format) + player_url: SWF Player URL (used for rtmpdump). + + The following fields are optional: + + alt_title: A secondary title of the video. + display_id An alternative identifier for the video, not necessarily + unique, but available before title. Typically, id is + something like "4234987", title "Dancing naked mole rats", + and display_id "dancing-naked-mole-rats" + thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID + * "url" + * "preference" (optional, int) - quality of the image + * "width" (optional, int) + * "height" (optional, int) + * "resolution" (optional, string "{width}x{height}", + deprecated) + * "filesize" (optional, int) + thumbnail: Full URL to a video thumbnail image. + description: Full video description. + uploader: Full name of the video uploader. + license: License name the video is licensed under. + creator: The creator of the video. + timestamp: UNIX timestamp of the moment the video was uploaded + upload_date: Video upload date (YYYYMMDD). + If not explicitly set, calculated from timestamp + release_timestamp: UNIX timestamp of the moment the video was released. + If it is not clear whether to use timestamp or this, use the former + release_date: The date (YYYYMMDD) when the video was released. + If not explicitly set, calculated from release_timestamp + modified_timestamp: UNIX timestamp of the moment the video was last modified. + modified_date: The date (YYYYMMDD) when the video was last modified. + If not explicitly set, calculated from modified_timestamp + uploader_id: Nickname or id of the video uploader. + uploader_url: Full URL to a personal webpage of the video uploader. + channel: Full name of the channel the video is uploaded on. + Note that channel fields may or may not repeat uploader + fields. This depends on a particular extractor. + channel_id: Id of the channel. + channel_url: Full URL to a channel webpage. + channel_follower_count: Number of followers of the channel. + location: Physical location where the video was filmed. + subtitles: The available subtitles as a dictionary in the format + {tag: subformats}. "tag" is usually a language code, and + "subformats" is a list sorted from lower to higher + preference, each element is a dictionary with the "ext" + entry and one of: + * "data": The subtitles file contents + * "url": A URL pointing to the subtitles file + It can optionally also have: + * "name": Name or description of the subtitles + "ext" will be calculated from URL if missing + automatic_captions: Like 'subtitles'; contains automatically generated + captions instead of normal subtitles + duration: Length of the video in seconds, as an integer or float. + view_count: How many users have watched the video on the platform. + like_count: Number of positive ratings of the video + dislike_count: Number of negative ratings of the video + repost_count: Number of reposts of the video + average_rating: Average rating give by users, the scale used depends on the webpage + comment_count: Number of comments on the video + comments: A list of comments, each with one or more of the following + properties (all but one of text or html optional): + * "author" - human-readable name of the comment author + * "author_id" - user ID of the comment author + * "author_thumbnail" - The thumbnail of the comment author + * "id" - Comment ID + * "html" - Comment as HTML + * "text" - Plain text of the comment + * "timestamp" - UNIX timestamp of comment + * "parent" - ID of the comment this one is replying to. + Set to "root" to indicate that this is a + comment to the original video. + * "like_count" - Number of positive ratings of the comment + * "dislike_count" - Number of negative ratings of the comment + * "is_favorited" - Whether the comment is marked as + favorite by the video uploader + * "author_is_uploader" - Whether the comment is made by + the video uploader + age_limit: Age restriction for the video, as an integer (years) + webpage_url: The URL to the video webpage, if given to yt-dlp it + should allow to get the same result again. (It will be set + by YoutubeDL if it's missing) + categories: A list of categories that the video falls in, for example + ["Sports", "Berlin"] + tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] + cast: A list of the video cast + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. + was_live: True, False, or None (=unknown). Whether this video was + originally a live stream. + live_status: 'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown) + If absent, automatically set from is_live, was_live + start_time: Time in seconds where the reproduction should start, as + specified in the URL. + end_time: Time in seconds where the reproduction should end, as + specified in the URL. + chapters: A list of dictionaries, with the following entries: + * "start_time" - The start time of the chapter in seconds + * "end_time" - The end time of the chapter in seconds + * "title" (optional, string) + playable_in_embed: Whether this video is allowed to play in embedded + players on other sites. Can be True (=always allowed), + False (=never allowed), None (=unknown), or a string + specifying the criteria for embedability (Eg: 'whitelist') + availability: Under what condition the video is available. One of + 'private', 'premium_only', 'subscriber_only', 'needs_auth', + 'unlisted' or 'public'. Use 'InfoExtractor._availability' + to set it + __post_extractor: A function to be called just before the metadata is + written to either disk, logger or console. The function + must return a dict which will be added to the info_dict. + This is usefull for additional information that is + time-consuming to extract. Note that the fields thus + extracted will not be available to output template and + match_filter. So, only "comments" and "comment_count" are + currently allowed to be extracted via this method. + + The following fields should only be used when the video belongs to some logical + chapter or section: + + chapter: Name or title of the chapter the video belongs to. + chapter_number: Number of the chapter the video belongs to, as an integer. + chapter_id: Id of the chapter the video belongs to, as a unicode string. + + The following fields should only be used when the video is an episode of some + series, programme or podcast: + + series: Title of the series or programme the video episode belongs to. + series_id: Id of the series or programme the video episode belongs to, as a unicode string. + season: Title of the season the video episode belongs to. + season_number: Number of the season the video episode belongs to, as an integer. + season_id: Id of the season the video episode belongs to, as a unicode string. + episode: Title of the video episode. Unlike mandatory video title field, + this field should denote the exact title of the video episode + without any kind of decoration. + episode_number: Number of the video episode within a season, as an integer. + episode_id: Id of the video episode, as a unicode string. + + The following fields should only be used when the media is a track or a part of + a music album: + + track: Title of the track. + track_number: Number of the track within an album or a disc, as an integer. + track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), + as a unicode string. + artist: Artist(s) of the track. + genre: Genre(s) of the track. + album: Title of the album the track belongs to. + album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). + album_artist: List of all artists appeared on the album (e.g. + "Ash Borer / Fell Voices" or "Various Artists", useful for splits + and compilations). + disc_number: Number of the disc or other physical medium the track belongs to, + as an integer. + release_year: Year (YYYY) when the album was released. + composer: Composer of the piece + + Unless mentioned otherwise, the fields should be Unicode strings. + + Unless mentioned otherwise, None is equivalent to absence of information. + + + _type "playlist" indicates multiple videos. + There must be a key "entries", which is a list, an iterable, or a PagedList + object, each element of which is a valid dictionary by this specification. + + Additionally, playlists can have "id", "title", and any other relevent + attributes with the same semantics as videos (see above). + + It can also have the following optional fields: + + playlist_count: The total number of videos in a playlist. If not given, + YoutubeDL tries to calculate it from "entries" + + + _type "multi_video" indicates that there are multiple videos that + form a single show, for examples multiple acts of an opera or TV episode. + It must have an entries key like a playlist and contain all the keys + required for a video at the same time. + + + _type "url" indicates that the video must be extracted from another + location, possibly by a different extractor. Its only required key is: + "url" - the next URL to extract. + The key "ie_key" can be set to the class name (minus the trailing "IE", + e.g. "Youtube") if the extractor class is known in advance. + Additionally, the dictionary may have any properties of the resolved entity + known in advance, for example "title" if the title of the referred video is + known ahead of time. + + + _type "url_transparent" entities have the same specification as "url", but + indicate that the given additional information is more precise than the one + associated with the resolved URL. + This is useful when a site employs a video service that hosts the video and + its technical metadata, but that video service does not embed a useful + title, description etc. + + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods and define a _VALID_URL regexp. + Probably, they should also be added to the list of extractors. + + Subclasses may also override suitable() if necessary, but ensure the function + signature is preserved and that this function imports everything it needs + (except other extractors), so that lazy_extractors works correctly + + _GEO_BYPASS attribute may be set to False in order to disable + geo restriction bypass mechanisms for a particular extractor. + Though it won't disable explicit geo restriction bypass based on + country code provided with geo_bypass_country. + + _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted + countries for this extractor. One of these countries will be used by + geo restriction bypass mechanism right away in order to bypass + geo restriction, of course, if the mechanism is not disabled. + + _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted + IP blocks in CIDR notation for this extractor. One of these IP blocks + will be used by geo restriction bypass mechanism similarly + to _GEO_COUNTRIES. + + The _WORKING attribute should be set to False for broken IEs + in order to warn the users and skip the tests. + """ + + _ready = False + _downloader = None + _x_forwarded_for_ip = None + _GEO_BYPASS = True + _GEO_COUNTRIES = None + _GEO_IP_BLOCKS = None + _WORKING = True + + _LOGIN_HINTS = { + 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials', + 'cookies': ( + 'Use --cookies-from-browser or --cookies for the authentication. ' + 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), + 'password': 'Use --username and --password, or --netrc to provide account credentials', + } + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader (a YoutubeDL instance). + If a downloader is not passed during initialization, + it must be set using "set_downloader()" before "extract()" is called""" + self._ready = False + self._x_forwarded_for_ip = None + self._printed_messages = set() + self.set_downloader(downloader) + + @classmethod + def _match_valid_url(cls, url): + # This does not use has/getattr intentionally - we want to know whether + # we have cached the regexp for *this* class, whereas getattr would also + # match the superclass + if '_VALID_URL_RE' not in cls.__dict__: + if '_VALID_URL' not in cls.__dict__: + cls._VALID_URL = cls._make_valid_url() + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + return cls._VALID_URL_RE.match(url) + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + # This function must import everything it needs (except other extractors), + # so that lazy_extractors works correctly + return cls._match_valid_url(url) is not None + + @classmethod + def _match_id(cls, url): + return cls._match_valid_url(url).group('id') + + @classmethod + def get_temp_id(cls, url): + try: + return cls._match_id(url) + except (IndexError, AttributeError): + return None + + @classmethod + def working(cls): + """Getter method for _WORKING.""" + return cls._WORKING + + def initialize(self): + """Initializes an instance (authentication, etc).""" + self._printed_messages = set() + self._initialize_geo_bypass({ + 'countries': self._GEO_COUNTRIES, + 'ip_blocks': self._GEO_IP_BLOCKS, + }) + if not self._ready: + self._real_initialize() + self._ready = True + + def _initialize_geo_bypass(self, geo_bypass_context): + """ + Initialize geo restriction bypass mechanism. + + This method is used to initialize geo bypass mechanism based on faking + X-Forwarded-For HTTP header. A random country from provided country list + is selected and a random IP belonging to this country is generated. This + IP will be passed as X-Forwarded-For HTTP header in all subsequent + HTTP requests. + + This method will be used for initial geo bypass mechanism initialization + during the instance initialization with _GEO_COUNTRIES and + _GEO_IP_BLOCKS. + + You may also manually call it from extractor's code if geo bypass + information is not available beforehand (e.g. obtained during + extraction) or due to some other reason. In this case you should pass + this information in geo bypass context passed as first argument. It may + contain following fields: + + countries: List of geo unrestricted countries (similar + to _GEO_COUNTRIES) + ip_blocks: List of geo unrestricted IP blocks in CIDR notation + (similar to _GEO_IP_BLOCKS) + + """ + if not self._x_forwarded_for_ip: + + # Geo bypass mechanism is explicitly disabled by user + if not self.get_param('geo_bypass', True): + return + + if not geo_bypass_context: + geo_bypass_context = {} + + # Backward compatibility: previously _initialize_geo_bypass + # expected a list of countries, some 3rd party code may still use + # it this way + if isinstance(geo_bypass_context, (list, tuple)): + geo_bypass_context = { + 'countries': geo_bypass_context, + } + + # The whole point of geo bypass mechanism is to fake IP + # as X-Forwarded-For HTTP header based on some IP block or + # country code. + + # Path 1: bypassing based on IP block in CIDR notation + + # Explicit IP block specified by user, use it right away + # regardless of whether extractor is geo bypassable or not + ip_block = self.get_param('geo_bypass_ip_block', None) + + # Otherwise use random IP block from geo bypass context but only + # if extractor is known as geo bypassable + if not ip_block: + ip_blocks = geo_bypass_context.get('ip_blocks') + if self._GEO_BYPASS and ip_blocks: + ip_block = random.choice(ip_blocks) + + if ip_block: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block) + self._downloader.write_debug( + '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip) + return + + # Path 2: bypassing based on country code + + # Explicit country code specified by user, use it right away + # regardless of whether extractor is geo bypassable or not + country = self.get_param('geo_bypass_country', None) + + # Otherwise use random country code from geo bypass context but + # only if extractor is known as geo bypassable + if not country: + countries = geo_bypass_context.get('countries') + if self._GEO_BYPASS and countries: + country = random.choice(countries) + + if country: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) + self._downloader.write_debug( + 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper())) + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + try: + for _ in range(2): + try: + self.initialize() + self.write_debug('Extracting URL: %s' % url) + ie_result = self._real_extract(url) + if ie_result is None: + return None + if self._x_forwarded_for_ip: + ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip + subtitles = ie_result.get('subtitles') + if (subtitles and 'live_chat' in subtitles + and 'no-live-chat' in self.get_param('compat_opts', [])): + del subtitles['live_chat'] + return ie_result + except GeoRestrictedError as e: + if self.__maybe_fake_ip_and_retry(e.countries): + continue + raise + except UnsupportedError: + raise + except ExtractorError as e: + kwargs = { + 'video_id': e.video_id or self.get_temp_id(url), + 'ie': self.IE_NAME, + 'tb': e.traceback or sys.exc_info()[2], + 'expected': e.expected, + 'cause': e.cause + } + if hasattr(e, 'countries'): + kwargs['countries'] = e.countries + raise type(e)(e.msg, **kwargs) + except compat_http_client.IncompleteRead as e: + raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) + except (KeyError, StopIteration) as e: + raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url)) + + def __maybe_fake_ip_and_retry(self, countries): + if (not self.get_param('geo_bypass_country', None) + and self._GEO_BYPASS + and self.get_param('geo_bypass', True) + and not self._x_forwarded_for_ip + and countries): + country_code = random.choice(countries) + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) + return True + return False + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + + @classmethod + def ie_key(cls): + """A string for getting the InfoExtractor with get_info_extractor""" + return cls.__name__[:-2] + + @property + def IE_NAME(self): + return compat_str(type(self).__name__[:-2]) + + @staticmethod + def __can_accept_status_code(err, expected_status): + assert isinstance(err, compat_urllib_error.HTTPError) + if expected_status is None: + return False + elif callable(expected_status): + return expected_status(err.code) is True + else: + return err.code in variadic(expected_status) + + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None): + """ + Return the response handle. + + See _download_webpage docstring for arguments specification. + """ + if not self._downloader._first_webpage_request: + sleep_interval = self.get_param('sleep_interval_requests') or 0 + if sleep_interval > 0: + self.to_screen('Sleeping %s seconds ...' % sleep_interval) + time.sleep(sleep_interval) + else: + self._downloader._first_webpage_request = False + + if note is None: + self.report_download_webpage(video_id) + elif note is not False: + if video_id is None: + self.to_screen('%s' % (note,)) + else: + self.to_screen('%s: %s' % (video_id, note)) + + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + + if isinstance(url_or_request, compat_urllib_request.Request): + url_or_request = update_Request( + url_or_request, data=data, headers=headers, query=query) + else: + if query: + url_or_request = update_url_query(url_or_request, query) + if data is not None or headers: + url_or_request = sanitized_Request(url_or_request, data, headers) + try: + return self._downloader.urlopen(url_or_request) + except network_exceptions as err: + if isinstance(err, compat_urllib_error.HTTPError): + if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of <https://bugs.python.org/issue15002> + # introduced in Python 3.4.1. + err.fp._error = err + return err.fp + + if errnote is False: + return False + if errnote is None: + errnote = 'Unable to download webpage' + + errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) + if fatal: + raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + else: + self.report_warning(errmsg) + return False + + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + """ + Return a tuple (page content as string, URL handle). + + See _download_webpage docstring for arguments specification. + """ + # Strip hashes from the URL (#1038) + if isinstance(url_or_request, (compat_str, str)): + url_or_request = url_or_request.partition('#')[0] + + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status) + if urlh is False: + assert not fatal + return False + content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding) + return (content, urlh) + + @staticmethod + def _guess_encoding_from_content(content_type, webpage_bytes): + m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) + if m: + encoding = m.group(1) + else: + m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', + webpage_bytes[:1024]) + if m: + encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' + else: + encoding = 'utf-8' + + return encoding + + def __check_blocked(self, content): + first_block = content[:512] + if ('<title>Access to this site is blocked' in content + and 'Websense' in first_block): + msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' + blocked_iframe = self._html_search_regex( + r'' + + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') + + xml_name = self._html_search_regex( + r'', webpage): + url = self._search_regex( + r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, + 'player URL', default=None, group='url') + if url: + break + + if not url: + url = self._og_search_url(webpage) + + mobj = re.match( + self._VALID_URL, self._proto_relative_url(url.strip())) + + player_id = mobj.group('player_id') + if not display_id: + display_id = player_id + if player_id: + player_page = self._download_webpage( + url, display_id, note='Downloading player page', + errnote='Could not download player page') + video_id = self._search_regex( + r'\d+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', + 'timestamp': 1494275280, + 'upload_date': '20170508', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + quality = qualities( + ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) + + formats = [{ + 'url': mobj.group('url'), + 'format_id': mobj.group('id'), + 'quality': quality(mobj.group('id')), + } for mobj in re.finditer( + r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', + webpage)] + self._sort_formats(formats) + + title = self._search_regex( + (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', group='value') + description = self._search_regex( + (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'description', default=None, + group='value') or self._html_search_meta('Description', webpage) + timestamp = unified_timestamp(self._search_regex( + r']+\bclass=["\']date["\'][^>]*>([^<]+)', + webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/peertube.py b/plugins/youtube_download/yt_dlp/extractor/peertube.py new file mode 100644 index 0000000..e0b2ab9 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/peertube.py @@ -0,0 +1,1402 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import functools +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + format_field, + int_or_none, + parse_resolution, + str_or_none, + try_get, + unified_timestamp, + url_or_none, + urljoin, + OnDemandPagedList, +) + + +class PeerTubeIE(InfoExtractor): + _INSTANCES_RE = r'''(?: + # Taken from https://instances.joinpeertube.org/instances + 40two\.tube| + a\.metube\.ch| + advtv\.ml| + algorithmic\.tv| + alimulama\.com| + arcana\.fun| + archive\.vidicon\.org| + artefac-paris\.tv| + auf1\.eu| + battlepenguin\.video| + beertube\.epgn\.ch| + befree\.nohost\.me| + bideoak\.argia\.eus| + birkeundnymphe\.de| + bitcointv\.com| + cattube\.org| + clap\.nerv-project\.eu| + climatejustice\.video| + comf\.tube| + conspiracydistillery\.com| + darkvapor\.nohost\.me| + daschauher\.aksel\.rocks| + digitalcourage\.video| + dreiecksnebel\.alex-detsch\.de| + eduvid\.org| + evangelisch\.video| + exo\.tube| + fair\.tube| + fediverse\.tv| + film\.k-prod\.fr| + flim\.txmn\.tk| + fotogramas\.politicaconciencia\.org| + ftsi\.ru| + gary\.vger\.cloud| + graeber\.video| + greatview\.video| + grypstube\.uni-greifswald\.de| + highvoltage\.tv| + hpstube\.fr| + htp\.live| + hyperreal\.tube| + juggling\.digital| + kino\.kompot\.si| + kino\.schuerz\.at| + kinowolnosc\.pl| + kirche\.peertube-host\.de| + kodcast\.com| + kolektiva\.media| + kraut\.zone| + kumi\.tube| + lastbreach\.tv| + lepetitmayennais\.fr\.nf| + lexx\.impa\.me| + libertynode\.tv| + libra\.syntazia\.org| + libremedia\.video| + live\.libratoi\.org| + live\.nanao\.moe| + live\.toobnix\.org| + livegram\.net| + lolitube\.freedomchan\.moe| + lucarne\.balsamine\.be| + maindreieck-tv\.de| + mani\.tube| + manicphase\.me| + media\.gzevd\.de| + media\.inno3\.cricket| + media\.kaitaia\.life| + media\.krashboyz\.org| + media\.over-world\.org| + media\.skewed\.de| + media\.undeadnetwork\.de| + medias\.pingbase\.net| + melsungen\.peertube-host\.de| + mirametube\.fr| + mojotube\.net| + monplaisirtube\.ddns\.net| + mountaintown\.video| + my\.bunny\.cafe| + myfreetube\.de| + mytube\.kn-cloud\.de| + mytube\.madzel\.de| + myworkoutarenapeertube\.cf| + nanawel-peertube\.dyndns\.org| + nastub\.cz| + offenes\.tv| + orgdup\.media| + ovaltube\.codinglab\.ch| + p2ptv\.ru| + p\.eertu\.be| + p\.lu| + peer\.azurs\.fr| + peertube1\.zeteo\.me| + peertube\.020\.pl| + peertube\.0x5e\.eu| + peertube\.alpharius\.io| + peertube\.am-networks\.fr| + peertube\.anduin\.net| + peertube\.anzui\.dev| + peertube\.arbleizez\.bzh| + peertube\.art3mis\.de| + peertube\.atilla\.org| + peertube\.atsuchan\.page| + peertube\.aukfood\.net| + peertube\.aventer\.biz| + peertube\.b38\.rural-it\.org| + peertube\.beeldengeluid\.nl| + peertube\.be| + peertube\.bgzashtita\.es| + peertube\.bitsandlinux\.com| + peertube\.biz| + peertube\.boba\.best| + peertube\.br0\.fr| + peertube\.bridaahost\.ynh\.fr| + peertube\.bubbletea\.dev| + peertube\.bubuit\.net| + peertube\.cabaal\.net| + peertube\.cats-home\.net| + peertube\.chemnitz\.freifunk\.net| + peertube\.chevro\.fr| + peertube\.chrisspiegl\.com| + peertube\.chtisurel\.net| + peertube\.cipherbliss\.com| + peertube\.cloud\.sans\.pub| + peertube\.cpge-brizeux\.fr| + peertube\.ctseuro\.com| + peertube\.cuatrolibertades\.org| + peertube\.cybercirujas\.club| + peertube\.cythin\.com| + peertube\.davigge\.com| + peertube\.dc\.pini\.fr| + peertube\.debian\.social| + peertube\.demonix\.fr| + peertube\.designersethiques\.org| + peertube\.desmu\.fr| + peertube\.devloprog\.org| + peertube\.devol\.it| + peertube\.dtmf\.ca| + peertube\.ecologie\.bzh| + peertube\.eu\.org| + peertube\.european-pirates\.eu| + peertube\.euskarabildua\.eus| + peertube\.fenarinarsa\.com| + peertube\.fomin\.site| + peertube\.forsud\.be| + peertube\.francoispelletier\.org| + peertube\.freenet\.ru| + peertube\.freetalklive\.com| + peertube\.functional\.cafe| + peertube\.gardeludwig\.fr| + peertube\.gargantia\.fr| + peertube\.gcfamily\.fr| + peertube\.genma\.fr| + peertube\.get-racing\.de| + peertube\.gidikroon\.eu| + peertube\.gruezishop\.ch| + peertube\.habets\.house| + peertube\.hackerfraternity\.org| + peertube\.ichigo\.everydayimshuflin\.com| + peertube\.ignifi\.me| + peertube\.inapurna\.org| + peertube\.informaction\.info| + peertube\.interhop\.org| + peertube\.iselfhost\.com| + peertube\.it| + peertube\.jensdiemer\.de| + peertube\.joffreyverd\.fr| + peertube\.kalua\.im| + peertube\.kathryl\.fr| + peertube\.keazilla\.net| + peertube\.klaewyss\.fr| + peertube\.kodcast\.com| + peertube\.kx\.studio| + peertube\.lagvoid\.com| + peertube\.lavallee\.tech| + peertube\.le5emeaxe\.fr| + peertube\.lestutosdeprocessus\.fr| + peertube\.librenet\.co\.za| + peertube\.logilab\.fr| + peertube\.louisematic\.site| + peertube\.luckow\.org| + peertube\.luga\.at| + peertube\.lyceeconnecte\.fr| + peertube\.manalejandro\.com| + peertube\.marud\.fr| + peertube\.mattone\.net| + peertube\.maxweiss\.io| + peertube\.monlycee\.net| + peertube\.mxinfo\.fr| + peertube\.myrasp\.eu| + peertube\.nebelcloud\.de| + peertube\.netzbegruenung\.de| + peertube\.newsocial\.tech| + peertube\.nicolastissot\.fr| + peertube\.nz| + peertube\.offerman\.com| + peertube\.opencloud\.lu| + peertube\.orthus\.link| + peertube\.patapouf\.xyz| + peertube\.pi2\.dev| + peertube\.plataformess\.org| + peertube\.pl| + peertube\.portaesgnos\.org| + peertube\.r2\.enst\.fr| + peertube\.r5c3\.fr| + peertube\.radres\.xyz| + peertube\.red| + peertube\.robonomics\.network| + peertube\.rtnkv\.cloud| + peertube\.runfox\.tk| + peertube\.satoshishop\.de| + peertube\.scic-tetris\.org| + peertube\.securitymadein\.lu| + peertube\.semweb\.pro| + peertube\.social\.my-wan\.de| + peertube\.soykaf\.org| + peertube\.stefofficiel\.me| + peertube\.stream| + peertube\.su| + peertube\.swrs\.net| + peertube\.takeko\.cyou| + peertube\.tangentfox\.com| + peertube\.taxinachtegel\.de| + peertube\.thenewoil\.xyz| + peertube\.ti-fr\.com| + peertube\.tiennot\.net| + peertube\.troback\.com| + peertube\.tspu\.edu\.ru| + peertube\.tux\.ovh| + peertube\.tv| + peertube\.tweb\.tv| + peertube\.ucy\.de| + peertube\.underworld\.fr| + peertube\.us\.to| + peertube\.ventresmous\.fr| + peertube\.vlaki\.cz| + peertube\.w\.utnw\.de| + peertube\.westring\.digital| + peertube\.xwiki\.com| + peertube\.zoz-serv\.org| + peervideo\.ru| + periscope\.numenaute\.org| + perron-tube\.de| + petitlutinartube\.fr| + phijkchu\.com| + pierre\.tube| + piraten\.space| + play\.rosano\.ca| + player\.ojamajo\.moe| + plextube\.nl| + pocketnetpeertube1\.nohost\.me| + pocketnetpeertube3\.nohost\.me| + pocketnetpeertube4\.nohost\.me| + pocketnetpeertube5\.nohost\.me| + pocketnetpeertube6\.nohost\.me| + pt\.24-7\.ro| + pt\.apathy\.top| + pt\.diaspodon\.fr| + pt\.fedi\.tech| + pt\.maciej\.website| + ptb\.lunarviews\.net| + ptmir1\.inter21\.net| + ptmir2\.inter21\.net| + ptmir3\.inter21\.net| + ptmir4\.inter21\.net| + ptmir5\.inter21\.net| + ptube\.horsentiers\.fr| + ptube\.xmanifesto\.club| + queermotion\.org| + re-wizja\.re-medium\.com| + regarder\.sans\.pub| + ruraletv\.ovh| + s1\.gegenstimme\.tv| + s2\.veezee\.tube| + sdmtube\.fr| + sender-fm\.veezee\.tube| + serv1\.wiki-tube\.de| + serv3\.wiki-tube\.de| + sickstream\.net| + sleepy\.tube| + sovran\.video| + spectra\.video| + stream\.elven\.pw| + stream\.k-prod\.fr| + stream\.shahab\.nohost\.me| + streamsource\.video| + studios\.racer159\.com| + testtube\.florimond\.eu| + tgi\.hosted\.spacebear\.ee| + thaitube\.in\.th| + the\.jokertv\.eu| + theater\.ethernia\.net| + thecool\.tube| + tilvids\.com| + toob\.bub\.org| + tpaw\.video| + truetube\.media| + tuba\.lhub\.pl| + tube-aix-marseille\.beta\.education\.fr| + tube-amiens\.beta\.education\.fr| + tube-besancon\.beta\.education\.fr| + tube-bordeaux\.beta\.education\.fr| + tube-clermont-ferrand\.beta\.education\.fr| + tube-corse\.beta\.education\.fr| + tube-creteil\.beta\.education\.fr| + tube-dijon\.beta\.education\.fr| + tube-education\.beta\.education\.fr| + tube-grenoble\.beta\.education\.fr| + tube-lille\.beta\.education\.fr| + tube-limoges\.beta\.education\.fr| + tube-montpellier\.beta\.education\.fr| + tube-nancy\.beta\.education\.fr| + tube-nantes\.beta\.education\.fr| + tube-nice\.beta\.education\.fr| + tube-normandie\.beta\.education\.fr| + tube-orleans-tours\.beta\.education\.fr| + tube-outremer\.beta\.education\.fr| + tube-paris\.beta\.education\.fr| + tube-poitiers\.beta\.education\.fr| + tube-reims\.beta\.education\.fr| + tube-rennes\.beta\.education\.fr| + tube-strasbourg\.beta\.education\.fr| + tube-toulouse\.beta\.education\.fr| + tube-versailles\.beta\.education\.fr| + tube1\.it\.tuwien\.ac\.at| + tube\.abolivier\.bzh| + tube\.ac-amiens\.fr| + tube\.aerztefueraufklaerung\.de| + tube\.alexx\.ml| + tube\.amic37\.fr| + tube\.anufrij\.de| + tube\.apolut\.net| + tube\.arkhalabs\.io| + tube\.arthack\.nz| + tube\.as211696\.net| + tube\.avensio\.de| + tube\.azbyka\.ru| + tube\.azkware\.net| + tube\.bachaner\.fr| + tube\.bmesh\.org| + tube\.borked\.host| + tube\.bstly\.de| + tube\.chaoszone\.tv| + tube\.chatelet\.ovh| + tube\.cloud-libre\.eu| + tube\.cms\.garden| + tube\.cowfee\.moe| + tube\.cryptography\.dog| + tube\.darknight-coffee\.org| + tube\.dev\.lhub\.pl| + tube\.distrilab\.fr| + tube\.dsocialize\.net| + tube\.ebin\.club| + tube\.fdn\.fr| + tube\.florimond\.eu| + tube\.foxarmy\.ml| + tube\.foxden\.party| + tube\.frischesicht\.de| + tube\.futuretic\.fr| + tube\.gnous\.eu| + tube\.grap\.coop| + tube\.graz\.social| + tube\.grin\.hu| + tube\.hackerscop\.org| + tube\.hordearii\.fr| + tube\.jeena\.net| + tube\.kai-stuht\.com| + tube\.kockatoo\.org| + tube\.kotur\.org| + tube\.lacaveatonton\.ovh| + tube\.linkse\.media| + tube\.lokad\.com| + tube\.lucie-philou\.com| + tube\.melonbread\.xyz| + tube\.mfraters\.net| + tube\.motuhake\.xyz| + tube\.mrbesen\.de| + tube\.nah\.re| + tube\.nchoco\.net| + tube\.novg\.net| + tube\.nox-rhea\.org| + tube\.nuagelibre\.fr| + tube\.nx12\.net| + tube\.octaplex\.net| + tube\.odat\.xyz| + tube\.oisux\.org| + tube\.opportunis\.me| + tube\.org\.il| + tube\.ortion\.xyz| + tube\.others\.social| + tube\.picasoft\.net| + tube\.plomlompom\.com| + tube\.pmj\.rocks| + tube\.portes-imaginaire\.org| + tube\.pyngu\.com| + tube\.rebellion\.global| + tube\.rhythms-of-resistance\.org| + tube\.rita\.moe| + tube\.rsi\.cnr\.it| + tube\.s1gm4\.eu| + tube\.saumon\.io| + tube\.schleuss\.online| + tube\.schule\.social| + tube\.seditio\.fr| + tube\.shanti\.cafe| + tube\.shela\.nu| + tube\.skrep\.in| + tube\.sp-codes\.de| + tube\.sp4ke\.com| + tube\.superseriousbusiness\.org| + tube\.systest\.eu| + tube\.tappret\.fr| + tube\.tardis\.world| + tube\.toontoet\.nl| + tube\.tpshd\.de| + tube\.troopers\.agency| + tube\.tylerdavis\.xyz| + tube\.undernet\.uy| + tube\.vigilian-consulting\.nl| + tube\.vraphim\.com| + tube\.wehost\.lgbt| + tube\.wien\.rocks| + tube\.wolfe\.casa| + tube\.xd0\.de| + tube\.xy-space\.de| + tube\.yapbreak\.fr| + tubedu\.org| + tubes\.jodh\.us| + tuktube\.com| + turkum\.me| + tututu\.tube| + tuvideo\.encanarias\.info| + tv1\.cocu\.cc| + tv1\.gomntu\.space| + tv2\.cocu\.cc| + tv\.adn\.life| + tv\.atmx\.ca| + tv\.bitma\.st| + tv\.generallyrubbish\.net\.au| + tv\.lumbung\.space| + tv\.mattchristiansenmedia\.com| + tv\.netwhood\.online| + tv\.neue\.city| + tv\.piejacker\.net| + tv\.pirateradio\.social| + tv\.undersco\.re| + tvox\.ru| + twctube\.twc-zone\.eu| + unfilter\.tube| + v\.basspistol\.org| + v\.kisombrella\.top| + v\.lastorder\.xyz| + v\.lor\.sh| + v\.phreedom\.club| + v\.sil\.sh| + v\.szy\.io| + v\.xxxapex\.com| + veezee\.tube| + vid\.dascoyote\.xyz| + vid\.garwood\.io| + vid\.ncrypt\.at| + vid\.pravdastalina\.info| + vid\.qorg11\.net| + vid\.rajeshtaylor\.com| + vid\.samtripoli\.com| + vid\.werefox\.dev| + vid\.wildeboer\.net| + video-cave-v2\.de| + video\.076\.ne\.jp| + video\.1146\.nohost\.me| + video\.altertek\.org| + video\.anartist\.org| + video\.apps\.thedoodleproject\.net| + video\.artist\.cx| + video\.asgardius\.company| + video\.balsillie\.net| + video\.bards\.online| + video\.binarydad\.com| + video\.blast-info\.fr| + video\.catgirl\.biz| + video\.cigliola\.com| + video\.cm-en-transition\.fr| + video\.cnt\.social| + video\.coales\.co| + video\.codingfield\.com| + video\.comptoir\.net| + video\.comune\.trento\.it| + video\.cpn\.so| + video\.csc49\.fr| + video\.cybre\.town| + video\.demokratischer-sommer\.de| + video\.discord-insoumis\.fr| + video\.dolphincastle\.com| + video\.dresden\.network| + video\.ecole-89\.com| + video\.elgrillolibertario\.org| + video\.emergeheart\.info| + video\.eradicatinglove\.xyz| + video\.ethantheenigma\.me| + video\.exodus-privacy\.eu\.org| + video\.fbxl\.net| + video\.fhtagn\.org| + video\.greenmycity\.eu| + video\.guerredeclasse\.fr| + video\.gyt\.is| + video\.hackers\.town| + video\.hardlimit\.com| + video\.hooli\.co| + video\.igem\.org| + video\.internet-czas-dzialac\.pl| + video\.islameye\.com| + video\.kicik\.fr| + video\.kuba-orlik\.name| + video\.kyushojitsu\.ca| + video\.lavolte\.net| + video\.lespoesiesdheloise\.fr| + video\.liberta\.vip| + video\.liege\.bike| + video\.linc\.systems| + video\.linux\.it| + video\.linuxtrent\.it| + video\.lokal\.social| + video\.lono\.space| + video\.lunasqu\.ee| + video\.lundi\.am| + video\.marcorennmaus\.de| + video\.mass-trespass\.uk| + video\.mugoreve\.fr| + video\.mundodesconocido\.com| + video\.mycrowd\.ca| + video\.nogafam\.es| + video\.odayacres\.farm| + video\.ozgurkon\.org| + video\.p1ng0ut\.social| + video\.p3x\.de| + video\.pcf\.fr| + video\.pony\.gallery| + video\.potate\.space| + video\.pourpenser\.pro| + video\.progressiv\.dev| + video\.resolutions\.it| + video\.rw501\.de| + video\.screamer\.wiki| + video\.sdm-tools\.net| + video\.sftblw\.moe| + video\.shitposter\.club| + video\.skyn3t\.in| + video\.soi\.ch| + video\.stuartbrand\.co\.uk| + video\.thinkof\.name| + video\.toot\.pt| + video\.triplea\.fr| + video\.turbo\.chat| + video\.vaku\.org\.ua| + video\.veloma\.org| + video\.violoncello\.ch| + video\.wilkie\.how| + video\.wsf2021\.info| + videorelay\.co| + videos-passages\.huma-num\.fr| + videos\.3d-wolf\.com| + videos\.ahp-numerique\.fr| + videos\.alexandrebadalo\.pt| + videos\.archigny\.net| + videos\.benjaminbrady\.ie| + videos\.buceoluegoexisto\.com| + videos\.capas\.se| + videos\.casually\.cat| + videos\.cloudron\.io| + videos\.coletivos\.org| + videos\.danksquad\.org| + videos\.denshi\.live| + videos\.fromouter\.space| + videos\.fsci\.in| + videos\.globenet\.org| + videos\.hauspie\.fr| + videos\.hush\.is| + videos\.john-livingston\.fr| + videos\.jordanwarne\.xyz| + videos\.lavoixdessansvoix\.org| + videos\.leslionsfloorball\.fr| + videos\.lucero\.top| + videos\.martyn\.berlin| + videos\.mastodont\.cat| + videos\.monstro1\.com| + videos\.npo\.city| + videos\.optoutpod\.com| + videos\.petch\.rocks| + videos\.pzelawski\.xyz| + videos\.rampin\.org| + videos\.scanlines\.xyz| + videos\.shmalls\.pw| + videos\.sibear\.fr| + videos\.stadtfabrikanten\.org| + videos\.tankernn\.eu| + videos\.testimonia\.org| + videos\.thisishowidontdisappear\.com| + videos\.traumaheilung\.net| + videos\.trom\.tf| + videos\.wakkerewereld\.nu| + videos\.weblib\.re| + videos\.yesil\.club| + vids\.roshless\.me| + vids\.tekdmn\.me| + vidz\.dou\.bet| + vod\.lumikko\.dev| + vs\.uniter\.network| + vulgarisation-informatique\.fr| + watch\.breadtube\.tv| + watch\.deranalyst\.ch| + watch\.ignorance\.eu| + watch\.krazy\.party| + watch\.libertaria\.space| + watch\.rt4mn\.org| + watch\.softinio\.com| + watch\.tubelab\.video| + web-fellow\.de| + webtv\.vandoeuvre\.net| + wechill\.space| + wikileaks\.video| + wiwi\.video| + worldofvids\.com| + wwtube\.net| + www4\.mir\.inter21\.net| + www\.birkeundnymphe\.de| + www\.captain-german\.com| + www\.wiki-tube\.de| + xxivproduction\.video| + xxx\.noho\.st| + + # from youtube-dl + peertube\.rainbowswingers\.net| + tube\.stanisic\.nl| + peer\.suiri\.us| + medias\.libox\.fr| + videomensoif\.ynh\.fr| + peertube\.travelpandas\.eu| + peertube\.rachetjay\.fr| + peertube\.montecsys\.fr| + tube\.eskuero\.me| + peer\.tube| + peertube\.umeahackerspace\.se| + tube\.nx-pod\.de| + video\.monsieurbidouille\.fr| + tube\.openalgeria\.org| + vid\.lelux\.fi| + video\.anormallostpod\.ovh| + tube\.crapaud-fou\.org| + peertube\.stemy\.me| + lostpod\.space| + exode\.me| + peertube\.snargol\.com| + vis\.ion\.ovh| + videosdulib\.re| + v\.mbius\.io| + videos\.judrey\.eu| + peertube\.osureplayviewer\.xyz| + peertube\.mathieufamily\.ovh| + www\.videos-libr\.es| + fightforinfo\.com| + peertube\.fediverse\.ru| + peertube\.oiseauroch\.fr| + video\.nesven\.eu| + v\.bearvideo\.win| + video\.qoto\.org| + justporn\.cc| + video\.vny\.fr| + peervideo\.club| + tube\.taker\.fr| + peertube\.chantierlibre\.org| + tube\.ipfixe\.info| + tube\.kicou\.info| + tube\.dodsorf\.as| + videobit\.cc| + video\.yukari\.moe| + videos\.elbinario\.net| + hkvideo\.live| + pt\.tux\.tf| + www\.hkvideo\.live| + FIGHTFORINFO\.com| + pt\.765racing\.com| + peertube\.gnumeria\.eu\.org| + nordenmedia\.com| + peertube\.co\.uk| + tube\.darfweb\.eu| + tube\.kalah-france\.org| + 0ch\.in| + vod\.mochi\.academy| + film\.node9\.org| + peertube\.hatthieves\.es| + video\.fitchfamily\.org| + peertube\.ddns\.net| + video\.ifuncle\.kr| + video\.fdlibre\.eu| + tube\.22decembre\.eu| + peertube\.harmoniescreatives\.com| + tube\.fabrigli\.fr| + video\.thedwyers\.co| + video\.bruitbruit\.com| + peertube\.foxfam\.club| + peer\.philoxweb\.be| + videos\.bugs\.social| + peertube\.malbert\.xyz| + peertube\.bilange\.ca| + libretube\.net| + diytelevision\.com| + peertube\.fedilab\.app| + libre\.video| + video\.mstddntfdn\.online| + us\.tv| + peertube\.sl-network\.fr| + peertube\.dynlinux\.io| + peertube\.david\.durieux\.family| + peertube\.linuxrocks\.online| + peerwatch\.xyz| + v\.kretschmann\.social| + tube\.otter\.sh| + yt\.is\.nota\.live| + tube\.dragonpsi\.xyz| + peertube\.boneheadmedia\.com| + videos\.funkwhale\.audio| + watch\.44con\.com| + peertube\.gcaillaut\.fr| + peertube\.icu| + pony\.tube| + spacepub\.space| + tube\.stbr\.io| + v\.mom-gay\.faith| + tube\.port0\.xyz| + peertube\.simounet\.net| + play\.jergefelt\.se| + peertube\.zeteo\.me| + tube\.danq\.me| + peertube\.kerenon\.com| + tube\.fab-l3\.org| + tube\.calculate\.social| + peertube\.mckillop\.org| + tube\.netzspielplatz\.de| + vod\.ksite\.de| + peertube\.laas\.fr| + tube\.govital\.net| + peertube\.stephenson\.cc| + bistule\.nohost\.me| + peertube\.kajalinifi\.de| + video\.ploud\.jp| + video\.omniatv\.com| + peertube\.ffs2play\.fr| + peertube\.leboulaire\.ovh| + peertube\.tronic-studio\.com| + peertube\.public\.cat| + peertube\.metalbanana\.net| + video\.1000i100\.fr| + peertube\.alter-nativ-voll\.de| + tube\.pasa\.tf| + tube\.worldofhauru\.xyz| + pt\.kamp\.site| + peertube\.teleassist\.fr| + videos\.mleduc\.xyz| + conf\.tube| + media\.privacyinternational\.org| + pt\.forty-two\.nl| + video\.halle-leaks\.de| + video\.grosskopfgames\.de| + peertube\.schaeferit\.de| + peertube\.jackbot\.fr| + tube\.extinctionrebellion\.fr| + peertube\.f-si\.org| + video\.subak\.ovh| + videos\.koweb\.fr| + peertube\.zergy\.net| + peertube\.roflcopter\.fr| + peertube\.floss-marketing-school\.com| + vloggers\.social| + peertube\.iriseden\.eu| + videos\.ubuntu-paris\.org| + peertube\.mastodon\.host| + armstube\.com| + peertube\.s2s\.video| + peertube\.lol| + tube\.open-plug\.eu| + open\.tube| + peertube\.ch| + peertube\.normandie-libre\.fr| + peertube\.slat\.org| + video\.lacaveatonton\.ovh| + peertube\.uno| + peertube\.servebeer\.com| + peertube\.fedi\.quebec| + tube\.h3z\.jp| + tube\.plus200\.com| + peertube\.eric\.ovh| + tube\.metadocs\.cc| + tube\.unmondemeilleur\.eu| + gouttedeau\.space| + video\.antirep\.net| + nrop\.cant\.at| + tube\.ksl-bmx\.de| + tube\.plaf\.fr| + tube\.tchncs\.de| + video\.devinberg\.com| + hitchtube\.fr| + peertube\.kosebamse\.com| + yunopeertube\.myddns\.me| + peertube\.varney\.fr| + peertube\.anon-kenkai\.com| + tube\.maiti\.info| + tubee\.fr| + videos\.dinofly\.com| + toobnix\.org| + videotape\.me| + voca\.tube| + video\.heromuster\.com| + video\.lemediatv\.fr| + video\.up\.edu\.ph| + balafon\.video| + video\.ivel\.fr| + thickrips\.cloud| + pt\.laurentkruger\.fr| + video\.monarch-pass\.net| + peertube\.artica\.center| + video\.alternanet\.fr| + indymotion\.fr| + fanvid\.stopthatimp\.net| + video\.farci\.org| + v\.lesterpig\.com| + video\.okaris\.de| + tube\.pawelko\.net| + peertube\.mablr\.org| + tube\.fede\.re| + pytu\.be| + evertron\.tv| + devtube\.dev-wiki\.de| + raptube\.antipub\.org| + video\.selea\.se| + peertube\.mygaia\.org| + video\.oh14\.de| + peertube\.livingutopia\.org| + peertube\.the-penguin\.de| + tube\.thechangebook\.org| + tube\.anjara\.eu| + pt\.pube\.tk| + video\.samedi\.pm| + mplayer\.demouliere\.eu| + widemus\.de| + peertube\.me| + peertube\.zapashcanon\.fr| + video\.latavernedejohnjohn\.fr| + peertube\.pcservice46\.fr| + peertube\.mazzonetto\.eu| + video\.irem\.univ-paris-diderot\.fr| + video\.livecchi\.cloud| + alttube\.fr| + video\.coop\.tools| + video\.cabane-libre\.org| + peertube\.openstreetmap\.fr| + videos\.alolise\.org| + irrsinn\.video| + video\.antopie\.org| + scitech\.video| + tube2\.nemsia\.org| + video\.amic37\.fr| + peertube\.freeforge\.eu| + video\.arbitrarion\.com| + video\.datsemultimedia\.com| + stoptrackingus\.tv| + peertube\.ricostrongxxx\.com| + docker\.videos\.lecygnenoir\.info| + peertube\.togart\.de| + tube\.postblue\.info| + videos\.domainepublic\.net| + peertube\.cyber-tribal\.com| + video\.gresille\.org| + peertube\.dsmouse\.net| + cinema\.yunohost\.support| + tube\.theocevaer\.fr| + repro\.video| + tube\.4aem\.com| + quaziinc\.com| + peertube\.metawurst\.space| + videos\.wakapo\.com| + video\.ploud\.fr| + video\.freeradical\.zone| + tube\.valinor\.fr| + refuznik\.video| + pt\.kircheneuenburg\.de| + peertube\.asrun\.eu| + peertube\.lagob\.fr| + videos\.side-ways\.net| + 91video\.online| + video\.valme\.io| + video\.taboulisme\.com| + videos-libr\.es| + tv\.mooh\.fr| + nuage\.acostey\.fr| + video\.monsieur-a\.fr| + peertube\.librelois\.fr| + videos\.pair2jeux\.tube| + videos\.pueseso\.club| + peer\.mathdacloud\.ovh| + media\.assassinate-you\.net| + vidcommons\.org| + ptube\.rousset\.nom\.fr| + tube\.cyano\.at| + videos\.squat\.net| + video\.iphodase\.fr| + peertube\.makotoworkshop\.org| + peertube\.serveur\.slv-valbonne\.fr| + vault\.mle\.party| + hostyour\.tv| + videos\.hack2g2\.fr| + libre\.tube| + pire\.artisanlogiciel\.net| + videos\.numerique-en-commun\.fr| + video\.netsyms\.com| + video\.die-partei\.social| + video\.writeas\.org| + peertube\.swarm\.solvingmaz\.es| + tube\.pericoloso\.ovh| + watching\.cypherpunk\.observer| + videos\.adhocmusic\.com| + tube\.rfc1149\.net| + peertube\.librelabucm\.org| + videos\.numericoop\.fr| + peertube\.koehn\.com| + peertube\.anarchmusicall\.net| + tube\.kampftoast\.de| + vid\.y-y\.li| + peertube\.xtenz\.xyz| + diode\.zone| + tube\.egf\.mn| + peertube\.nomagic\.uk| + visionon\.tv| + videos\.koumoul\.com| + video\.rastapuls\.com| + video\.mantlepro\.com| + video\.deadsuperhero\.com| + peertube\.musicstudio\.pro| + peertube\.we-keys\.fr| + artitube\.artifaille\.fr| + peertube\.ethernia\.net| + tube\.midov\.pl| + peertube\.fr| + watch\.snoot\.tube| + peertube\.donnadieu\.fr| + argos\.aquilenet\.fr| + tube\.nemsia\.org| + tube\.bruniau\.net| + videos\.darckoune\.moe| + tube\.traydent\.info| + dev\.videos\.lecygnenoir\.info| + peertube\.nayya\.org| + peertube\.live| + peertube\.mofgao\.space| + video\.lequerrec\.eu| + peertube\.amicale\.net| + aperi\.tube| + tube\.ac-lyon\.fr| + video\.lw1\.at| + www\.yiny\.org| + videos\.pofilo\.fr| + tube\.lou\.lt| + choob\.h\.etbus\.ch| + tube\.hoga\.fr| + peertube\.heberge\.fr| + video\.obermui\.de| + videos\.cloudfrancois\.fr| + betamax\.video| + video\.typica\.us| + tube\.piweb\.be| + video\.blender\.org| + peertube\.cat| + tube\.kdy\.ch| + pe\.ertu\.be| + peertube\.social| + videos\.lescommuns\.org| + tv\.datamol\.org| + videonaute\.fr| + dialup\.express| + peertube\.nogafa\.org| + megatube\.lilomoino\.fr| + peertube\.tamanoir\.foucry\.net| + peertube\.devosi\.org| + peertube\.1312\.media| + tube\.bootlicker\.party| + skeptikon\.fr| + video\.blueline\.mg| + tube\.homecomputing\.fr| + tube\.ouahpiti\.info| + video\.tedomum\.net| + video\.g3l\.org| + fontube\.fr| + peertube\.gaialabs\.ch| + tube\.kher\.nl| + peertube\.qtg\.fr| + video\.migennes\.net| + tube\.p2p\.legal| + troll\.tv| + videos\.iut-orsay\.fr| + peertube\.solidev\.net| + videos\.cemea\.org| + video\.passageenseine\.fr| + videos\.festivalparminous\.org| + peertube\.touhoppai\.moe| + sikke\.fi| + peer\.hostux\.social| + share\.tube| + peertube\.walkingmountains\.fr| + videos\.benpro\.fr| + peertube\.parleur\.net| + peertube\.heraut\.eu| + tube\.aquilenet\.fr| + peertube\.gegeweb\.eu| + framatube\.org| + thinkerview\.video| + tube\.conferences-gesticulees\.net| + peertube\.datagueule\.tv| + video\.lqdn\.fr| + tube\.mochi\.academy| + media\.zat\.im| + video\.colibris-outilslibres\.org| + tube\.svnet\.fr| + peertube\.video| + peertube2\.cpy\.re| + peertube3\.cpy\.re| + videos\.tcit\.fr| + peertube\.cpy\.re| + canard\.tube + )''' + _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _API_BASE = 'https://%s/api/v1/videos/%s/%s' + _VALID_URL = r'''(?x) + (?: + peertube:(?P[^:]+):| + https?://(?P%s)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/ + ) + (?P%s) + ''' % (_INSTANCES_RE, _UUID_RE) + _TESTS = [{ + 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', + 'md5': '8563064d245a4be5705bddb22bb00a28', + 'info_dict': { + 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', + 'ext': 'mp4', + 'title': 'What is PeerTube?', + 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', + 'timestamp': 1538391166, + 'upload_date': '20181001', + 'uploader': 'Framasoft', + 'uploader_id': '3', + 'uploader_url': 'https://framatube.org/accounts/framasoft', + 'channel': 'A propos de PeerTube', + 'channel_id': '2215', + 'channel_url': 'https://framatube.org/video-channels/joinpeertube', + 'language': 'en', + 'license': 'Attribution - Share Alike', + 'duration': 113, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'tags': ['framasoft', 'peertube'], + 'categories': ['Science & Technology'], + } + }, { + 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e', + 'info_dict': { + 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + } + }, { + 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd', + 'info_dict': { + 'id': '3fbif9S3WmtTP8gGsC5HBd', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + }, + }, { + 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd', + 'info_dict': { + 'id': '3fbif9S3WmtTP8gGsC5HBd', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + }, + }, { + # Issue #26002 + 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', + 'info_dict': { + 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', + 'ext': 'mp4', + 'title': 'Dot matrix printer shell demo', + 'uploader_id': '3', + 'timestamp': 1587401293, + 'upload_date': '20200420', + 'uploader': 'Drew DeVault', + } + }, { + 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', + 'only_matching': True, + }, { + # nsfw + 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', + 'only_matching': True, + }, { + 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', + 'only_matching': True, + }, { + 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', + 'only_matching': True, + }, { + 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', + 'only_matching': True, + }] + + @staticmethod + def _extract_peertube_url(webpage, source_url): + mobj = re.match( + r'https?://(?P[^/]+)/(?:videos/(?:watch|embed)|w)/(?P%s)' + % PeerTubeIE._UUID_RE, source_url) + if mobj and any(p in webpage for p in ( + 'meta property="og:platform" content="PeerTube"', + 'PeerTube<', + 'There will be other non JS-based clients to access PeerTube', + '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): + return 'peertube:%s:%s' % mobj.group('host', 'id') + + @staticmethod + def _extract_urls(webpage, source_url): + entries = re.findall( + r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)''' + % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage) + if not entries: + peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url) + if peertube_url: + entries = [peertube_url] + return entries + + def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True): + return self._download_json( + self._API_BASE % (host, video_id, path), video_id, + note=note, errnote=errnote, fatal=fatal) + + def _get_subtitles(self, host, video_id): + captions = self._call_api( + host, video_id, 'captions', note='Downloading captions JSON', + fatal=False) + if not isinstance(captions, dict): + return + data = captions.get('data') + if not isinstance(data, list): + return + subtitles = {} + for e in data: + language_id = try_get(e, lambda x: x['language']['id'], compat_str) + caption_url = urljoin('https://%s' % host, e.get('captionPath')) + if not caption_url: + continue + subtitles.setdefault(language_id or 'en', []).append({ + 'url': caption_url, + }) + return subtitles + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') or mobj.group('host_2') + video_id = mobj.group('id') + + video = self._call_api( + host, video_id, '', note='Downloading video JSON') + + title = video['name'] + + formats = [] + files = video.get('files') or [] + for playlist in (video.get('streamingPlaylists') or []): + if not isinstance(playlist, dict): + continue + playlist_files = playlist.get('files') + if not (playlist_files and isinstance(playlist_files, list)): + continue + files.extend(playlist_files) + for file_ in files: + if not isinstance(file_, dict): + continue + file_url = url_or_none(file_.get('fileUrl')) + if not file_url: + continue + file_size = int_or_none(file_.get('size')) + format_id = try_get( + file_, lambda x: x['resolution']['label'], compat_str) + f = parse_resolution(format_id) + f.update({ + 'url': file_url, + 'format_id': format_id, + 'filesize': file_size, + }) + if format_id == '0p': + f['vcodec'] = 'none' + else: + f['fps'] = int_or_none(file_.get('fps')) + formats.append(f) + self._sort_formats(formats) + + description = video.get('description') + if description and len(description) >= 250: + # description is shortened + full_description = self._call_api( + host, video_id, 'description', note='Downloading description JSON', + fatal=False) + + if isinstance(full_description, dict): + description = str_or_none(full_description.get('description')) or description + + subtitles = self.extract_subtitles(host, video_id) + + def data(section, field, type_): + return try_get(video, lambda x: x[section][field], type_) + + def account_data(field, type_): + return data('account', field, type_) + + def channel_data(field, type_): + return data('channel', field, type_) + + category = data('category', 'label', compat_str) + categories = [category] if category else None + + nsfw = video.get('nsfw') + if nsfw is bool: + age_limit = 18 if nsfw else 0 + else: + age_limit = None + + webpage_url = 'https://%s/videos/watch/%s' % (host, video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')), + 'timestamp': unified_timestamp(video.get('publishedAt')), + 'uploader': account_data('displayName', compat_str), + 'uploader_id': str_or_none(account_data('id', int)), + 'uploader_url': url_or_none(account_data('url', compat_str)), + 'channel': channel_data('displayName', compat_str), + 'channel_id': str_or_none(channel_data('id', int)), + 'channel_url': url_or_none(channel_data('url', compat_str)), + 'language': data('language', 'id', compat_str), + 'license': data('licence', 'label', compat_str), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('views')), + 'like_count': int_or_none(video.get('likes')), + 'dislike_count': int_or_none(video.get('dislikes')), + 'age_limit': age_limit, + 'tags': try_get(video, lambda x: x['tags'], list), + 'categories': categories, + 'formats': formats, + 'subtitles': subtitles, + 'webpage_url': webpage_url, + } + + +class PeerTubePlaylistIE(InfoExtractor): + IE_NAME = 'PeerTube:Playlist' + _TYPES = { + 'a': 'accounts', + 'c': 'video-channels', + 'w/p': 'video-playlists', + } + _VALID_URL = r'''(?x) + https?://(?P<host>%s)/(?P<type>(?:%s))/ + (?P<id>[^/]+) + ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) + _TESTS = [{ + 'url': 'https://peertube.tux.ovh/w/p/3af94cba-95e8-4b74-b37a-807ab6d82526', + 'info_dict': { + 'id': '3af94cba-95e8-4b74-b37a-807ab6d82526', + 'description': 'playlist', + 'timestamp': 1611171863, + 'title': 'playlist', + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://peertube.tux.ovh/w/p/wkyqcQBnsvFxtUB2pkYc1e', + 'info_dict': { + 'id': 'wkyqcQBnsvFxtUB2pkYc1e', + 'description': 'Cette liste de vidéos contient uniquement les jeux qui peuvent être terminés en une seule vidéo.', + 'title': 'Let\'s Play', + 'timestamp': 1604147331, + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', + 'info_dict': { + 'id': 'hFdJoTuyhNJVa1cDWd1d12', + 'description': 'Diversas palestras do Richard Stallman no Brasil.', + 'title': 'Richard Stallman no Brasil', + 'timestamp': 1599676222, + }, + 'playlist_mincount': 9, + }, { + 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos', + 'info_dict': { + 'id': 'chocobozzz', + 'timestamp': 1553874564, + 'title': 'chocobozzz', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos', + 'info_dict': { + 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', + 'timestamp': 1519917377, + 'title': 'Les vidéos de Framasoft', + }, + 'playlist_mincount': 345, + }, { + 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', + 'info_dict': { + 'id': 'blender_open_movies@video.blender.org', + 'timestamp': 1542287810, + 'title': 'Official Blender Open Movies', + }, + 'playlist_mincount': 11, + }] + _API_BASE = 'https://%s/api/v1/%s/%s%s' + _PAGE_SIZE = 30 + + def call_api(self, host, name, path, base, **kwargs): + return self._download_json( + self._API_BASE % (host, base, name, path), name, **kwargs) + + def fetch_page(self, host, id, type, page): + page += 1 + video_data = self.call_api( + host, id, + f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both', + type, note=f'Downloading page {page}').get('data', []) + for video in video_data: + shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) + video_title = video.get('name') or try_get(video, lambda x: x['video']['name']) + yield self.url_result( + f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(), + video_id=shortUUID, video_title=video_title) + + def _extract_playlist(self, host, type, id): + info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False) + + playlist_title = info.get('displayName') + playlist_description = info.get('description') + playlist_timestamp = unified_timestamp(info.get('createdAt')) + channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') + channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') + thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') + + entries = OnDemandPagedList(functools.partial( + self.fetch_page, host, id, type), self._PAGE_SIZE) + + return self.playlist_result( + entries, id, playlist_title, playlist_description, + timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) + + def _real_extract(self, url): + type, host, id = self._match_valid_url(url).group('type', 'host', 'id') + type = self._TYPES[type] + return self._extract_playlist(host, type, id) diff --git a/plugins/youtube_download/yt_dlp/extractor/peertv.py b/plugins/youtube_download/yt_dlp/extractor/peertv.py new file mode 100644 index 0000000..002d33a --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/peertv.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class PeerTVIE(InfoExtractor): + IE_NAME = 'peer.tv' + _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.peer.tv/de/841', + 'info_dict': { + 'id': '841', + 'ext': 'mp4', + 'title': 'Die Brunnenburg', + 'description': 'md5:4395f6142b090338340ab88a3aae24ed', + }, + }, { + 'url': 'https://www.peer.tv/it/404', + 'info_dict': { + 'id': '404', + 'ext': 'mp4', + 'title': 'Cascate di ghiaccio in Val Gardena', + 'description': 'md5:e8e5907f236171842674e8090e3577b8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') + + js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, + headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') + + session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') + + player_webpage = self._download_webpage( + f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', + video_id, note='Downloading player webpage') + + m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') + m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '), + 'formats': formats, + 'description': self._html_search_meta(('og:description', 'description'), webpage), + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + } diff --git a/plugins/youtube_download/yt_dlp/extractor/peloton.py b/plugins/youtube_download/yt_dlp/extractor/peloton.py new file mode 100644 index 0000000..7d83225 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/peloton.py @@ -0,0 +1,221 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_urllib_parse, +) +from ..utils import ( + ExtractorError, + float_or_none, + str_or_none, + traverse_obj, + url_or_none, +) + + +class PelotonIE(InfoExtractor): + IE_NAME = 'peloton' + _NETRC_MACHINE = 'peloton' + _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)' + _TESTS = [{ + 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86', + 'info_dict': { + 'id': '0e9653eb53544eeb881298c8d7a87b86', + 'title': '20 min Chest & Back Strength', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.jpg', + 'description': 'md5:fcd5be9b9eda0194b470e13219050a66', + 'creator': 'Chase Tucker', + 'release_timestamp': 1556141400, + 'timestamp': 1556141400, + 'upload_date': '20190424', + 'duration': 1389, + 'categories': ['Strength'], + 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'], + 'is_live': False, + 'chapters': 'count:1', + 'subtitles': {'en': [{ + 'url': r're:^https?://.+', + 'ext': 'vtt' + }]}, + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }, { + 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', + 'info_dict': { + 'id': '26603d53d6bb4de1b340514864a6a6a8', + 'title': '30 min Earth Day Run', + 'ext': 'm4a', + 'thumbnail': r're:https://.+\.jpg', + 'description': 'md5:adc065a073934d7ee0475d217afe0c3d', + 'creator': 'Selena Samuela', + 'release_timestamp': 1587567600, + 'timestamp': 1587567600, + 'upload_date': '20200422', + 'duration': 1802, + 'categories': ['Running'], + 'is_live': False, + 'chapters': 'count:3' + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }] + + _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' + + def _start_session(self, video_id): + self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session') + + def _login(self, video_id): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + try: + self._download_json( + 'https://api.onepeloton.com/auth/login', video_id, note='Logging in', + data=json.dumps({ + 'username_or_email': username, + 'password': password, + 'with_pubsub': False + }).encode(), + headers={'Content-Type': 'application/json', 'User-Agent': 'web'}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + json_string = self._webpage_read_content(e.cause, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Login failed') + else: + raise + + def _get_token(self, video_id): + try: + subscription = self._download_json( + 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token', + data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + json_string = self._webpage_read_content(e.cause, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached') + else: + raise + return subscription['token'] + + def _real_extract(self, url): + video_id = self._match_id(url) + try: + self._start_session(video_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self._login(video_id) + self._start_session(video_id) + else: + raise + + metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id) + ride_data = metadata.get('ride') + if not ride_data: + raise ExtractorError('Missing stream metadata') + token = self._get_token(video_id) + + is_live = False + if ride_data.get('content_format') == 'audio': + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token)) + formats = [{ + 'url': url, + 'ext': 'm4a', + 'format_id': 'audio', + 'vcodec': 'none', + }] + subtitles = {} + else: + if ride_data.get('vod_stream_url'): + url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % ( + ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), + ride_data['vod_stream_url'], + compat_urllib_parse.quote(compat_urllib_parse.quote(token))) + elif ride_data.get('live_stream_url'): + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token)) + is_live = True + else: + raise ExtractorError('Missing video URL') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + + if metadata.get('instructor_cues'): + subtitles['cues'] = [{ + 'data': json.dumps(metadata.get('instructor_cues')), + 'ext': 'json' + }] + + category = ride_data.get('fitness_discipline_display_name') + chapters = [{ + 'start_time': segment.get('start_time_offset'), + 'end_time': segment.get('start_time_offset') + segment.get('length'), + 'title': segment.get('name') + } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] + + self._sort_formats(formats) + return { + 'id': video_id, + 'title': ride_data.get('title'), + 'formats': formats, + 'thumbnail': url_or_none(ride_data.get('image_url')), + 'description': str_or_none(ride_data.get('description')), + 'creator': traverse_obj(ride_data, ('instructor', 'name')), + 'release_timestamp': ride_data.get('original_air_time'), + 'timestamp': ride_data.get('original_air_time'), + 'subtitles': subtitles, + 'duration': float_or_none(ride_data.get('length')), + 'categories': [category] if category else None, + 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')), + 'is_live': is_live, + 'chapters': chapters + } + + +class PelotonLiveIE(InfoExtractor): + IE_NAME = 'peloton:live' + IE_DESC = 'Peloton Live' + _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)' + _TEST = { + 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b', + 'info_dict': { + 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc', + 'title': '30 min HIIT Ride: Live from Home', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.png', + 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817', + 'creator': 'Alex Toussaint', + 'release_timestamp': 1587736620, + 'timestamp': 1587736620, + 'upload_date': '20200424', + 'duration': 2014, + 'categories': ['Cycling'], + 'is_live': False, + 'chapters': 'count:3' + }, + 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + } + + def _real_extract(self, url): + workout_id = self._match_id(url) + peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id) + + if peloton.get('ride_id'): + if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START': + return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id']) + else: + raise ExtractorError('Ride has not started', expected=True) + else: + raise ExtractorError('Missing video ID') diff --git a/plugins/youtube_download/yt_dlp/extractor/people.py b/plugins/youtube_download/yt_dlp/extractor/people.py new file mode 100644 index 0000000..6ca9571 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/people.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PeopleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html' + + _TEST = { + 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', + 'info_dict': { + 'id': 'ref:20995451', + 'ext': 'mp4', + 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', + 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 246.318, + 'timestamp': 1458720585, + 'upload_date': '20160323', + 'uploader_id': '416418724', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + + def _real_extract(self, url): + return self.url_result( + 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' + % self._match_id(url), 'BrightcoveNew') diff --git a/plugins/youtube_download/yt_dlp/extractor/performgroup.py b/plugins/youtube_download/yt_dlp/extractor/performgroup.py new file mode 100644 index 0000000..c00d393 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/performgroup.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PerformGroupIE(InfoExtractor): + _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' + _TESTS = [{ + # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html + 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', + 'md5': '259cb03d142e2e52471e8837ecacb29f', + 'info_dict': { + 'id': 'xgrwobuzumes1lwjxtcdpwgxd', + 'ext': 'mp4', + 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', + 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', + 'timestamp': 1511533477, + 'upload_date': '20171124', + } + }] + + def _call_api(self, service, auth_token, content_id, referer_url): + return self._download_json( + 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), + content_id, headers={ + 'Referer': referer_url, + 'Origin': 'http://player.performgroup.com', + }, query={ + '_fmt': 'json', + }) + + def _real_extract(self, url): + player_id, auth_token = self._match_valid_url(url).groups() + bootstrap = self._call_api('bootstrap', auth_token, player_id, url) + video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] + video_id = video['uuid'] + vod = self._call_api('vod', auth_token, video_id, url) + media = vod['videos']['video'][0]['media'] + + formats = [] + hls_url = media.get('hls', {}).get('url') + if hls_url: + formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + hds_url = media.get('hds', {}).get('url') + if hds_url: + formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) + + for c in media.get('content', []): + c_url = c.get('url') + if not c_url: + continue + tbr = int_or_none(c.get('bitrate'), 1000) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': c_url, + 'tbr': tbr, + 'width': int_or_none(c.get('width')), + 'height': int_or_none(c.get('height')), + 'filesize': int_or_none(c.get('fileSize')), + 'vcodec': c.get('type'), + 'fps': int_or_none(c.get('videoFrameRate')), + 'vbr': int_or_none(c.get('videoRate'), 1000), + 'abr': int_or_none(c.get('audioRate'), 1000), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': int_or_none(video.get('publishedTime'), 1000), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/periscope.py b/plugins/youtube_download/yt_dlp/extractor/periscope.py new file mode 100644 index 0000000..b93a02b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/periscope.py @@ -0,0 +1,195 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + unescapeHTML, +) + + +class PeriscopeBaseIE(InfoExtractor): + _M3U8_HEADERS = { + 'Referer': 'https://www.periscope.tv/' + } + + def _call_api(self, method, query, item_id): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s' % method, + item_id, query=query) + + def _parse_broadcast_data(self, broadcast, video_id): + title = broadcast.get('status') or 'Periscope Broadcast' + uploader = broadcast.get('user_display_name') or broadcast.get('username') + title = '%s - %s' % (uploader, title) if uploader else title + is_live = broadcast.get('state').lower() == 'running' + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'title': self._live_title(title) if is_live else title, + 'timestamp': parse_iso8601(broadcast.get('created_at')), + 'uploader': uploader, + 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), + 'thumbnails': thumbnails, + 'view_count': int_or_none(broadcast.get('total_watched')), + 'tags': broadcast.get('tags'), + 'is_live': is_live, + } + + @staticmethod + def _extract_common_format_info(broadcast): + return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) + + @staticmethod + def _add_width_and_height(f, width, height): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS) + if len(m3u8_formats) == 1: + self._add_width_and_height(m3u8_formats[0], width, height) + for f in m3u8_formats: + f.setdefault('http_headers', {}).update(self._M3U8_HEADERS) + return m3u8_formats + + +class PeriscopeIE(PeriscopeBaseIE): + IE_DESC = 'Periscope' + IE_NAME = 'periscope' + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' + # Alive example URLs can be found here https://www.periscope.tv/ + _TESTS = [{ + 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', + 'md5': '65b57957972e503fcbbaeed8f4fa04ca', + 'info_dict': { + 'id': '56102209', + 'ext': 'mp4', + 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', + 'timestamp': 1438978559, + 'upload_date': '20150807', + 'uploader': 'Bec Boop', + 'uploader_id': '1465763', + }, + 'skip': 'Expires in 24 hours', + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + token = self._match_id(url) + + stream = self._call_api( + 'accessVideoPublic', {'broadcast_id': token}, token) + + broadcast = stream['broadcast'] + info = self._parse_broadcast_data(broadcast, token) + + state = broadcast.get('state').lower() + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + video_urls = set() + formats = [] + for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): + video_url = stream.get(format_id + '_url') + if not video_url or video_url in video_urls: + continue + video_urls.add(video_url) + if format_id != 'rtmp': + m3u8_formats = self._extract_pscp_m3u8_formats( + video_url, token, format_id, state, width, height, False) + formats.extend(m3u8_formats) + continue + rtmp_format = { + 'url': video_url, + 'ext': 'flv' if format_id == 'rtmp' else 'mp4', + } + self._add_width_and_height(rtmp_format) + formats.append(rtmp_format) + self._sort_formats(formats) + + info['formats'] = formats + return info + + +class PeriscopeUserIE(PeriscopeBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' + IE_DESC = 'Periscope user videos' + IE_NAME = 'periscope:user' + + _TEST = { + 'url': 'https://www.periscope.tv/LularoeHusbandMike/', + 'info_dict': { + 'id': 'LularoeHusbandMike', + 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', + }, + # Periscope only shows videos in the last 24 hours, so it's possible to + # get 0 videos + 'playlist_mincount': 0, + } + + def _real_extract(self, url): + user_name = self._match_id(url) + + webpage = self._download_webpage(url, user_name) + + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P<data>.+?)\1', + webpage, 'data store', default='{}', group='data')), + user_name) + + user = list(data_store['UserCache']['users'].values())[0]['user'] + user_id = user['id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] + + broadcasts = self._call_api( + 'getUserBroadcastsPublic', + {'user_id': user_id, 'session_id': session_id}, + user_name)['broadcasts'] + + broadcast_ids = [ + broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] + + title = user.get('display_name') or user.get('username') or user_name + description = user.get('description') + + entries = [ + self.url_result( + 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) + for broadcast_id in broadcast_ids] + + return self.playlist_result(entries, user_id, title, description) diff --git a/plugins/youtube_download/yt_dlp/extractor/philharmoniedeparis.py b/plugins/youtube_download/yt_dlp/extractor/philharmoniedeparis.py new file mode 100644 index 0000000..9f4899c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/philharmoniedeparis.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + try_get, + urljoin, +) + + +class PhilharmonieDeParisIE(InfoExtractor): + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/doc/CIMU/ + ) + (?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', + 'md5': 'a0a4b195f544645073631cbec166a2c2', + 'info_dict': { + 'id': '1086697', + 'ext': 'mp4', + 'title': 'Jazz à la Villette : Knower', + }, + }, { + 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', + 'info_dict': { + 'id': '1032066', + 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', + }, + 'playlist_mincount': 2, + }, { + 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }] + _LIVE_URL = 'https://live.philharmoniedeparis.fr' + + def _real_extract(self, url): + video_id = self._match_id(url) + + config = self._download_json( + '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) + + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: + continue + format_urls.add(format_url) + m3u8_url = urljoin(self._LIVE_URL, format_url) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats and not self.get_param('ignore_no_formats'): + return + self._sort_formats(formats) + return { + 'title': title, + 'formats': formats, + } + + thumbnail = urljoin(self._LIVE_URL, config.get('image')) + + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + 'thumbnail': thumbnail, + }) + return info + + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) diff --git a/plugins/youtube_download/yt_dlp/extractor/phoenix.py b/plugins/youtube_download/yt_dlp/extractor/phoenix.py new file mode 100644 index 0000000..e3ea014 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/phoenix.py @@ -0,0 +1,133 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .youtube import YoutubeIE +from .zdf import ZDFBaseIE +from ..compat import compat_str +from ..utils import ( + int_or_none, + merge_dicts, + try_get, + unified_timestamp, + urljoin, +) + + +class PhoenixIE(ZDFBaseIE): + IE_NAME = 'phoenix.de' + _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html' + _TESTS = [{ + # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html + 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', + 'md5': '34ec321e7eb34231fd88616c65c92db0', + 'info_dict': { + 'id': '210222_phx_nachgehakt_corona_protest', + 'ext': 'mp4', + 'title': 'Wohin führt der Protest in der Pandemie?', + 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', + 'duration': 1691, + 'timestamp': 1613902500, + 'upload_date': '20210221', + 'uploader': 'Phoenix', + 'series': 'corona nachgehakt', + 'episode': 'Wohin führt der Protest in der Pandemie?', + }, + }, { + # Youtube embed + 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', + 'info_dict': { + 'id': 'hMQtqFYjomk', + 'ext': 'mp4', + 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', + 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', + 'duration': 3509, + 'upload_date': '20201219', + 'uploader': 'phoenix', + 'uploader_id': 'phoenix', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', + 'only_matching': True, + }, { + # no media + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html', + 'only_matching': True, + }, { + # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche', + 'only_matching': True, + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + article = self._download_json( + 'https://www.phoenix.de/response/id/%s' % article_id, article_id, + 'Downloading article JSON') + + video = article['absaetze'][0] + title = video.get('titel') or article.get('subtitel') + + if video.get('typ') == 'video-youtube': + video_id = video['id'] + return self.url_result( + video_id, ie=YoutubeIE.ie_key(), video_id=video_id, + video_title=title) + + video_id = compat_str(video.get('basename') or video.get('content')) + + details = self._download_json( + 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', + video_id, 'Downloading details JSON', query={ + 'ak': 'web', + 'ptmd': 'true', + 'id': video_id, + 'profile': 'player2', + }) + + title = title or details['title'] + content_id = details['tracking']['nielsen']['content']['assetid'] + + info = self._extract_ptmd( + 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, + content_id, None, url) + + duration = int_or_none(try_get( + details, lambda x: x['tracking']['nielsen']['content']['length'])) + timestamp = unified_timestamp(details.get('editorialDate')) + series = try_get( + details, lambda x: x['tracking']['nielsen']['content']['program'], + compat_str) + episode = title if details.get('contentType') == 'episode' else None + + thumbnails = [] + teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} + for thumbnail_key, thumbnail_url in teaser_images.items(): + thumbnail_url = urljoin(url, thumbnail_url) + if not thumbnail_url: + continue + thumbnail = { + 'url': thumbnail_url, + } + m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) + thumbnails.append(thumbnail) + + return merge_dicts(info, { + 'id': content_id, + 'title': title, + 'description': details.get('leadParagraph'), + 'duration': duration, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'uploader': details.get('tvService'), + 'series': series, + 'episode': episode, + }) diff --git a/plugins/youtube_download/yt_dlp/extractor/photobucket.py b/plugins/youtube_download/yt_dlp/extractor/photobucket.py new file mode 100644 index 0000000..53aebe2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/photobucket.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class PhotobucketIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' + _TEST = { + 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', + 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', + 'timestamp': 1367669341, + 'upload_date': '20130504', + 'uploader': 'rachaneronas', + 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + video_extension = mobj.group('ext') + + webpage = self._download_webpage(url, video_id) + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', + webpage, 'info json') + info = json.loads(info_json) + url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) + return { + 'id': video_id, + 'url': url, + 'uploader': info['username'], + 'timestamp': info['creationDate'], + 'title': info['title'], + 'ext': video_extension, + 'thumbnail': info['thumbUrl'], + } diff --git a/plugins/youtube_download/yt_dlp/extractor/picarto.py b/plugins/youtube_download/yt_dlp/extractor/picarto.py new file mode 100644 index 0000000..adf21fd --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/picarto.py @@ -0,0 +1,127 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + js_to_json, +) + + +class PicartoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://picarto.tv/Setz', + 'info_dict': { + 'id': 'Setz', + 'ext': 'mp4', + 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'timestamp': int, + 'is_live': True + }, + 'skip': 'Stream is offline', + } + + @classmethod + def suitable(cls, url): + return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + data = self._download_json( + 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={ + 'query': '''{ + channel(name: "%s") { + adult + id + online + stream_name + title + } + getLoadBalancerUrl(channel_name: "%s") { + url + } +}''' % (channel_id, channel_id), + })['data'] + metadata = data['channel'] + + if metadata.get('online') == 0: + raise ExtractorError('Stream is offline', expected=True) + title = metadata['title'] + + cdn_data = self._download_json( + data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js', + channel_id, 'Downloading load balancing info') + + formats = [] + for source in (cdn_data.get('source') or []): + source_url = source.get('url') + if not source_url: + continue + source_type = source.get('type') + if source_type == 'html5/application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False)) + elif source_type == 'html5/video/mp4': + formats.append({ + 'url': source_url, + }) + self._sort_formats(formats) + + mature = metadata.get('adult') + if mature is None: + age_limit = None + else: + age_limit = 18 if mature is True else 0 + + return { + 'id': channel_id, + 'title': title.strip(), + 'is_live': True, + 'channel': channel_id, + 'channel_id': metadata.get('id'), + 'channel_url': 'https://picarto.tv/%s' % channel_id, + 'age_limit': age_limit, + 'formats': formats, + } + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', + 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', + 'info_dict': { + 'id': 'ArtofZod_2017.12.12.00.13.23.flv', + 'ext': 'mp4', + 'title': 'ArtofZod_2017.12.12.00.13.23.flv', + 'thumbnail': r're:^https?://.*\.jpg' + }, + }, { + 'url': 'https://picarto.tv/videopopout/Plague', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + vod_info = self._parse_json( + self._search_regex( + r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, + 'vod player'), + video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats( + vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': vod_info.get('vodThumb'), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/piksel.py b/plugins/youtube_download/yt_dlp/extractor/piksel.py new file mode 100644 index 0000000..84c3de2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/piksel.py @@ -0,0 +1,183 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + dict_get, + ExtractorError, + int_or_none, + join_nonempty, + parse_iso8601, + try_get, + unescapeHTML, +) + + +class PikselIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?: + (?: + player\. + (?: + olympusattelecom| + vibebyvista + )| + (?:api|player)\.multicastmedia| + (?:api-ovp|player)\.piksel + )\.com| + (?: + mz-edge\.stream\.co| + movie-s\.nhk\.or + )\.jp| + vidego\.baltimorecity\.gov + )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' + _TESTS = [ + { + 'url': 'http://player.piksel.com/v/ums2867l', + 'md5': '34e34c8d89dc2559976a6079db531e85', + 'info_dict': { + 'id': 'ums2867l', + 'ext': 'mp4', + 'title': 'GX-005 with Caption', + 'timestamp': 1481335659, + 'upload_date': '20161210' + } + }, + { + # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al + 'url': 'https://player.piksel.com/v/v80kqp41', + 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', + 'info_dict': { + 'id': 'v80kqp41', + 'ext': 'mp4', + 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', + 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', + 'timestamp': 1486171129, + 'upload_date': '20170204' + } + }, + { + # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ + 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', + 'only_matching': True, + } + ] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)', + webpage) + if mobj: + return mobj.group('url') + + def _call_api(self, app_token, resource, display_id, query, fatal=True): + response = (self._download_json( + 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), + display_id, query=query, fatal=fatal) or {}).get('response') + failure = try_get(response, lambda x: x['failure']['reason']) + if failure: + if fatal: + raise ExtractorError(failure, expected=True) + self.report_warning(failure) + return response + + def _real_extract(self, url): + ref_id, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, display_id) + app_token = self._search_regex([ + r'clientAPI\s*:\s*"([^"]+)"', + r'data-de-api-key\s*=\s*"([^"]+)"' + ], webpage, 'app token') + query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} + program = self._call_api( + app_token, 'program', display_id, query)['WsProgramResponse']['program'] + video_id = program['uuid'] + video_data = program['asset'] + title = video_data['title'] + asset_type = dict_get(video_data, ['assetType', 'asset_type']) + + formats = [] + + def process_asset_file(asset_file): + if not asset_file: + return + # TODO: extract rtmp formats + http_url = asset_file.get('http_url') + if not http_url: + return + tbr = None + vbr = int_or_none(asset_file.get('videoBitrate'), 1024) + abr = int_or_none(asset_file.get('audioBitrate'), 1024) + if asset_type == 'video': + tbr = vbr + abr + elif asset_type == 'audio': + tbr = abr + + formats.append({ + 'format_id': join_nonempty('http', tbr), + 'url': unescapeHTML(http_url), + 'vbr': vbr, + 'abr': abr, + 'width': int_or_none(asset_file.get('videoWidth')), + 'height': int_or_none(asset_file.get('videoHeight')), + 'filesize': int_or_none(asset_file.get('filesize')), + 'tbr': tbr, + }) + + def process_asset_files(asset_files): + for asset_file in (asset_files or []): + process_asset_file(asset_file) + + process_asset_files(video_data.get('assetFiles')) + process_asset_file(video_data.get('referenceFile')) + if not formats: + asset_id = video_data.get('assetid') or program.get('assetid') + if asset_id: + process_asset_files(try_get(self._call_api( + app_token, 'asset_file', display_id, { + 'assetid': asset_id, + }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) + if smil_url: + transform_source = None + if ref_id == 'nhkworld': + # TODO: figure out if this is something to be fixed in urljoin, + # _parse_smil_formats or keep it here + transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') + formats.extend(self._extract_smil_formats( + re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, + transform_source=transform_source, fatal=False)) + + self._sort_formats(formats, ('tbr', )) # Incomplete resolution information + + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + if caption_url: + subtitles.setdefault(caption.get('locale', 'en'), []).append({ + 'url': caption_url}) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailUrl'), + 'timestamp': parse_iso8601(video_data.get('dateadd')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pinkbike.py b/plugins/youtube_download/yt_dlp/extractor/pinkbike.py new file mode 100644 index 0000000..9f3501f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pinkbike.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + remove_start, + str_to_int, + unified_strdate, +) + + +class PinkbikeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.pinkbike.com/video/402811/', + 'md5': '4814b8ca7651034cd87e3361d5c2155a', + 'info_dict': { + 'id': '402811', + 'ext': 'mp4', + 'title': 'Brandon Semenuk - RAW 100', + 'description': 'Official release: www.redbull.ca/rupertwalker', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 100, + 'upload_date': '20150406', + 'uploader': 'revelco', + 'location': 'Victoria, British Columbia, Canada', + 'view_count': int, + 'comment_count': int, + } + }, { + 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.pinkbike.com/video/%s' % video_id, video_id) + + formats = [] + for _, format_id, src in re.findall( + r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage): + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + formats.append({ + 'url': src, + 'format_id': format_id, + 'height': height, + }) + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') + description = self._html_search_regex( + r'(?s)id="media-description"[^>]*>(.+?)<', + webpage, 'description', default=None) or remove_start( + self._og_search_description(webpage), title + '. ') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) + + uploader = self._search_regex( + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r'class="fullTime"[^>]+title="([^"]+)"', + webpage, 'upload date', fatal=False)) + + location = self._html_search_regex( + r'(?s)<dt>Location</dt>\s*<dd>(.+?)<', + webpage, 'location', fatal=False) + + def extract_count(webpage, label): + return str_to_int(self._search_regex( + r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label, + webpage, label, fatal=False)) + + view_count = extract_count(webpage, 'Views') + comment_count = extract_count(webpage, 'Comments') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'upload_date': upload_date, + 'uploader': uploader, + 'location': location, + 'view_count': view_count, + 'comment_count': comment_count, + 'formats': formats + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pinterest.py b/plugins/youtube_download/yt_dlp/extractor/pinterest.py new file mode 100644 index 0000000..80e9cd0 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pinterest.py @@ -0,0 +1,201 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + try_get, + unified_timestamp, + url_or_none, +) + + +class PinterestBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)' + + def _call_api(self, resource, video_id, options): + return self._download_json( + 'https://www.pinterest.com/resource/%sResource/get/' % resource, + video_id, 'Download %s JSON metadata' % resource, query={ + 'data': json.dumps({'options': options}) + })['resource_response'] + + def _extract_video(self, data, extract_formats=True): + video_id = data['id'] + + title = (data.get('title') or data.get('grid_title') or video_id).strip() + + urls = [] + formats = [] + duration = None + if extract_formats: + for format_id, format_dict in data['videos']['video_list'].items(): + if not isinstance(format_dict, dict): + continue + format_url = url_or_none(format_dict.get('url')) + if not format_url or format_url in urls: + continue + urls.append(format_url) + duration = float_or_none(format_dict.get('duration'), scale=1000) + ext = determine_ext(format_url) + if 'hls' in format_id.lower() or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'width': int_or_none(format_dict.get('width')), + 'height': int_or_none(format_dict.get('height')), + 'duration': duration, + }) + self._sort_formats(formats) + + description = data.get('description') or data.get('description_html') or data.get('seo_description') + timestamp = unified_timestamp(data.get('created_at')) + + def _u(field): + return try_get(data, lambda x: x['closeup_attribution'][field], compat_str) + + uploader = _u('full_name') + uploader_id = _u('id') + + repost_count = int_or_none(data.get('repin_count')) + comment_count = int_or_none(data.get('comment_count')) + categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list) + tags = data.get('hashtags') + + thumbnails = [] + images = data.get('images') + if isinstance(images, dict): + for thumbnail_id, thumbnail in images.items(): + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'thumbnails': thumbnails, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, + 'formats': formats, + 'extractor_key': PinterestIE.ie_key(), + } + + +class PinterestIE(PinterestBaseIE): + _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE + _TESTS = [{ + 'url': 'https://www.pinterest.com/pin/664281013778109217/', + 'md5': '6550c2af85d6d9f3fe3b88954d1577fc', + 'info_dict': { + 'id': '664281013778109217', + 'ext': 'mp4', + 'title': 'Origami', + 'description': 'md5:b9d90ddf7848e897882de9e73344f7dd', + 'duration': 57.7, + 'timestamp': 1593073622, + 'upload_date': '20200625', + 'uploader': 'Love origami -I am Dafei', + 'uploader_id': '586523688879454212', + 'repost_count': 50, + 'comment_count': 0, + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'https://co.pinterest.com/pin/824721750502199491/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._call_api( + 'Pin', video_id, { + 'field_set_key': 'unauth_react_main_pin', + 'id': video_id, + })['data'] + return self._extract_video(data) + + +class PinterestCollectionIE(PinterestBaseIE): + _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE + _TESTS = [{ + 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/', + 'info_dict': { + 'id': '585890301462791043', + 'title': 'cool diys', + }, + 'playlist_count': 8, + }, { + 'url': 'https://www.pinterest.ca/fudohub/videos/', + 'info_dict': { + 'id': '682858430939307450', + 'title': 'VIDEOS', + }, + 'playlist_mincount': 365, + 'skip': 'Test with extract_formats=False', + }] + + @classmethod + def suitable(cls, url): + return False if PinterestIE.suitable(url) else super( + PinterestCollectionIE, cls).suitable(url) + + def _real_extract(self, url): + username, slug = self._match_valid_url(url).groups() + board = self._call_api( + 'Board', slug, { + 'slug': slug, + 'username': username + })['data'] + board_id = board['id'] + options = { + 'board_id': board_id, + 'page_size': 250, + } + bookmark = None + entries = [] + while True: + if bookmark: + options['bookmarks'] = [bookmark] + board_feed = self._call_api('BoardFeed', board_id, options) + for item in (board_feed.get('data') or []): + if not isinstance(item, dict) or item.get('type') != 'pin': + continue + video_id = item.get('id') + if video_id: + # Some pins may not be available anonymously via pin URL + # video = self._extract_video(item, extract_formats=False) + # video.update({ + # '_type': 'url_transparent', + # 'url': 'https://www.pinterest.com/pin/%s/' % video_id, + # }) + # entries.append(video) + entries.append(self._extract_video(item)) + bookmark = board_feed.get('bookmark') + if not bookmark: + break + return self.playlist_result( + entries, playlist_id=board_id, playlist_title=board.get('name')) diff --git a/plugins/youtube_download/yt_dlp/extractor/pixivsketch.py b/plugins/youtube_download/yt_dlp/extractor/pixivsketch.py new file mode 100644 index 0000000..f0ad0b2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pixivsketch.py @@ -0,0 +1,122 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_timestamp, +) + + +class PixivSketchBaseIE(InfoExtractor): + def _call_api(self, video_id, path, referer, note='Downloading JSON metadata'): + response = self._download_json(f'https://sketch.pixiv.net/api/{path}', video_id, note=note, headers={ + 'Referer': referer, + 'X-Requested-With': referer, + }) + errors = traverse_obj(response, ('errors', ..., 'message')) + if errors: + raise ExtractorError(' '.join(f'{e}.' for e in errors)) + return response.get('data') or {} + + +class PixivSketchIE(PixivSketchBaseIE): + IE_NAME = 'pixiv:sketch' + _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<uploader_id>[a-zA-Z0-9_-]+)/lives/(?P<id>\d+)/?' + _TESTS = [{ + 'url': 'https://sketch.pixiv.net/@nuhutya/lives/3654620468641830507', + 'info_dict': { + 'id': '7370666691623196569', + 'title': 'まにあえクリスマス!', + 'uploader': 'ぬふちゃ', + 'uploader_id': 'nuhutya', + 'channel_id': '9844815', + 'age_limit': 0, + 'timestamp': 1640351536, + }, + 'skip': True, + }, { + # these two (age_limit > 0) requires you to login on website, but it's actually not required for download + 'url': 'https://sketch.pixiv.net/@namahyou/lives/4393103321546851377', + 'info_dict': { + 'id': '4907995960957946943', + 'title': 'クリスマスなんて知らん🖕', + 'uploader': 'すゃもり', + 'uploader_id': 'suya2mori2', + 'channel_id': '31169300', + 'age_limit': 15, + 'timestamp': 1640347640, + }, + 'skip': True, + }, { + 'url': 'https://sketch.pixiv.net/@8aki/lives/3553803162487249670', + 'info_dict': { + 'id': '1593420639479156945', + 'title': 'おまけ本作業(リョナ有)', + 'uploader': 'おぶい / Obui', + 'uploader_id': 'oving', + 'channel_id': '17606', + 'age_limit': 18, + 'timestamp': 1640330263, + }, + 'skip': True, + }] + + def _real_extract(self, url): + video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') + data = self._call_api(video_id, f'lives/{video_id}.json', url) + + if not traverse_obj(data, 'is_broadcasting'): + raise ExtractorError(f'This live is offline. Use https://sketch.pixiv.net/@{uploader_id} for ongoing live.', expected=True) + + m3u8_url = traverse_obj(data, ('owner', 'hls_movie', 'url')) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': data.get('name'), + 'formats': formats, + 'uploader': traverse_obj(data, ('user', 'name'), ('owner', 'user', 'name')), + 'uploader_id': traverse_obj(data, ('user', 'unique_name'), ('owner', 'user', 'unique_name')), + 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))), + 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0, + 'timestamp': unified_timestamp(data.get('created_at')), + 'is_live': True + } + + +class PixivSketchUserIE(PixivSketchBaseIE): + IE_NAME = 'pixiv:sketch:user' + _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<id>[a-zA-Z0-9_-]+)/?' + _TESTS = [{ + 'url': 'https://sketch.pixiv.net/@nuhutya', + 'only_matching': True, + }, { + 'url': 'https://sketch.pixiv.net/@namahyou', + 'only_matching': True, + }, { + 'url': 'https://sketch.pixiv.net/@8aki', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return super(PixivSketchUserIE, cls).suitable(url) and not PixivSketchIE.suitable(url) + + def _real_extract(self, url): + user_id = self._match_id(url) + data = self._call_api(user_id, f'lives/users/@{user_id}.json', url) + + if not traverse_obj(data, 'is_broadcasting'): + try: + self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure') + except ExtractorError as ex: + if ex.cause and ex.cause.code == 401: + self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies') + raise ExtractorError('This user is offline', expected=True) + + return self.url_result(f'https://sketch.pixiv.net/@{user_id}/lives/{data["id"]}') diff --git a/plugins/youtube_download/yt_dlp/extractor/pladform.py b/plugins/youtube_download/yt_dlp/extractor/pladform.py new file mode 100644 index 0000000..99ade85 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pladform.py @@ -0,0 +1,149 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + parse_qs, + xpath_text, + qualities, +) + + +class PladformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + out\.pladform\.ru/player| + static\.pladform\.ru/player\.swf + ) + \?.*\bvideoid=| + video\.pladform\.ru/catalog/video/videoid/ + ) + (?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282', + 'info_dict': { + 'id': '6216d548e755edae6e8280667d774791', + 'ext': 'mp4', + 'timestamp': 1406117012, + 'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции', + 'age_limit': 0, + 'upload_date': '20140723', + 'thumbnail': str, + 'view_count': int, + 'description': str, + 'category': list, + 'uploader_id': '12082', + 'uploader': 'Comedy Club', + 'duration': 367, + }, + 'expected_warnings': ['HTTP Error 404: Not Found'] + }, { + 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', + 'md5': '53362fac3a27352da20fa2803cc5cd6f', + 'info_dict': { + 'id': '3777899', + 'ext': 'mp4', + 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', + 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3190, + }, + }, { + 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', + 'only_matching': True, + }, { + 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + qs = parse_qs(url) + pl = qs.get('pl', ['1'])[0] + + video = self._download_xml( + 'http://out.pladform.ru/getVideo', video_id, query={ + 'pl': pl, + 'videoid': video_id, + }, fatal=False) + + def fail(text): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, text), + expected=True) + + if not video: + targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').geturl() + if targetUrl == url: + raise ExtractorError('Can\'t parse page') + return self.url_result(targetUrl) + + if video.tag == 'error': + fail(video.text) + + quality = qualities(('ld', 'sd', 'hd')) + + formats = [] + for src in video.findall('./src'): + if src is None: + continue + format_url = src.text + if not format_url: + continue + if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src.text, + 'format_id': src.get('quality'), + 'quality': quality(src.get('quality')), + }) + + if not formats: + error = xpath_text(video, './cap', 'error', default=None) + if error: + fail(error) + + self._sort_formats(formats) + + webpage = self._download_webpage( + 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + video_id) + + title = self._og_search_title(webpage, fatal=False) or xpath_text( + video, './/title', 'title', fatal=True) + description = self._search_regex( + r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) or xpath_text( + video, './/cover', 'cover') + + duration = int_or_none(xpath_text(video, './/time', 'duration')) + age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/planetmarathi.py b/plugins/youtube_download/yt_dlp/extractor/planetmarathi.py new file mode 100644 index 0000000..07ac15b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/planetmarathi.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class PlanetMarathiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' + _TESTS = [{ + 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', + 'playlist_mincount': 2, + 'info_dict': { + 'id': 'ek-unad-divas', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', + 'ext': 'mp4', + 'title': 'ek unad divas', + 'alt_title': 'चित्रपट', + 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', + 'season_number': None, + 'episode_number': 1, + 'duration': 5539, + 'upload_date': '20210829', + }, + }] # Trailer skipped + }, { + 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', + 'playlist_mincount': 10, + 'info_dict': { + 'id': 'baap-beep-baap-season-1', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', + 'ext': 'mp4', + 'title': 'Manohar Kanhere', + 'alt_title': 'मनोहर कान्हेरे', + 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', + 'season_number': 1, + 'episode_number': 1, + 'duration': 29, + 'upload_date': '20210829', + }, + }] # Trailers, Episodes, other Character profiles skipped + }] + + def _real_extract(self, url): + id = self._match_id(url) + entries = [] + json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + for asset in json_data: + asset_title = asset['mediaAssetName']['en'] + if asset_title == 'Movie': + asset_title = id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) + self._sort_formats(formats) + entries.append({ + 'id': asset_id, + 'title': asset_title, + 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), + 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), + 'season_number': asset.get('mediaAssetSeason'), + 'episode_number': asset.get('mediaAssetIndexForAssetType'), + 'duration': asset.get('mediaAssetDurationInSeconds'), + 'upload_date': unified_strdate(asset.get('created')), + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result(entries, playlist_id=id) diff --git a/plugins/youtube_download/yt_dlp/extractor/platzi.py b/plugins/youtube_download/yt_dlp/extractor/platzi.py new file mode 100644 index 0000000..23c8256 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/platzi.py @@ -0,0 +1,224 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziBaseIE(InfoExtractor): + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in urlh.geturl(): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), + None) + + for kind in ('error', 'password', 'nonFields'): + error = str_or_none(login.get('%sError' % kind)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + +class PlatziIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P<id>\d+)-[^/?\#&]+ + ''' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + lecture_id = self._match_id(url) + + webpage = self._download_webpage(url, lecture_id) + + data = self._parse_json( + self._search_regex( + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), + lecture_id) + + material = data['initialState']['material'] + desc = material['description'] + title = desc['title'] + + formats = [] + for server_id, server in material['videos'].items(): + if not isinstance(server, dict): + continue + for format_id in ('hls', 'dash'): + format_url = url_or_none(server.get(format_id)) + if not format_url: + continue + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, lecture_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + note='Downloading %s m3u8 information' % server_id, + fatal=False)) + elif format_id == 'dash': + formats.extend(self._extract_mpd_formats( + format_url, lecture_id, mpd_id=format_id, + note='Downloading %s MPD manifest' % server_id, + fatal=False)) + self._sort_formats(formats) + + content = str_or_none(desc.get('content')) + description = (clean_html(compat_b64decode(content).decode('utf-8')) + if content else None) + duration = int_or_none(material.get('duration'), invscale=60) + + return { + 'id': lecture_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + } + + +class PlatziCourseIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/(?P<id>[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) diff --git a/plugins/youtube_download/yt_dlp/extractor/playfm.py b/plugins/youtube_download/yt_dlp/extractor/playfm.py new file mode 100644 index 0000000..4298cbe --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playfm.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, +) + + +class PlayFMIE(InfoExtractor): + IE_NAME = 'play.fm' + _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])' + + _TEST = { + 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12', + 'md5': 'c505f8307825a245d0c7ad1850001f22', + 'info_dict': { + 'id': '71276', + 'ext': 'mp3', + 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', + 'description': '', + 'duration': 5627, + 'timestamp': 1406033781, + 'upload_date': '20140722', + 'uploader': 'Dan Drastic', + 'uploader_id': '71170', + 'view_count': int, + 'comment_count': int, + }, + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + slug = mobj.group('slug') + + recordings = self._download_json( + 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) + + error = recordings.get('error') + if isinstance(error, dict): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error.get('message')), + expected=True) + + audio_url = recordings['audio'] + video_id = compat_str(recordings.get('id') or video_id) + title = recordings['title'] + description = recordings.get('description') + duration = int_or_none(recordings.get('recordingDuration')) + timestamp = parse_iso8601(recordings.get('created_at')) + uploader = recordings.get('page', {}).get('title') + uploader_id = compat_str(recordings.get('page', {}).get('id')) + view_count = int_or_none(recordings.get('playCount')) + comment_count = int_or_none(recordings.get('commentCount')) + categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')] + + return { + 'id': video_id, + 'url': audio_url, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/playplustv.py b/plugins/youtube_download/yt_dlp/extractor/playplustv.py new file mode 100644 index 0000000..fd72a37 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playplustv.py @@ -0,0 +1,108 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + PUTRequest, +) + + +class PlayPlusTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _TEST = { + 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', + 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', + 'info_dict': { + 'id': 'db8d274a5163424e967f35a30ddafb8e', + 'ext': 'mp4', + 'title': 'Capítulo 179 - Final', + 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', + 'timestamp': 1529992740, + 'upload_date': '20180626', + }, + 'skip': 'Requires account credential', + } + _NETRC_MACHINE = 'playplustv' + _GEO_COUNTRIES = ['BR'] + _token = None + _profile_id = None + + def _call_api(self, resource, video_id=None, query=None): + return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ + 'Authorization': 'Bearer ' + self._token, + }, query=query) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + req = PUTRequest( + 'https://api.playplus.tv/api/web/login', json.dumps({ + 'email': email, + 'password': password, + }).encode(), { + 'Content-Type': 'application/json; charset=utf-8', + }) + + try: + self._token = self._download_json(req, None)['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(self._parse_json( + e.cause.read(), None)['errorMessage'], expected=True) + raise + + self._profile = self._call_api('Profiles')['list'][0]['_id'] + + def _real_extract(self, url): + project_id, media_id = self._match_valid_url(url).groups() + media = self._call_api( + 'Media', media_id, { + 'profileId': self._profile, + 'projectId': project_id, + 'mediaId': media_id, + })['obj'] + title = media['title'] + + formats = [] + for f in media.get('files', []): + f_url = f.get('url') + if not f_url: + continue + file_info = f.get('fileInfo') or {} + formats.append({ + 'url': f_url, + 'width': int_or_none(file_info.get('width')), + 'height': int_or_none(file_info.get('height')), + }) + self._sort_formats(formats) + + thumbnails = [] + for thumb in media.get('thumbs', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + return { + 'id': media_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': clean_html(media.get('description')) or media.get('shortDescription'), + 'timestamp': int_or_none(media.get('publishDate'), 1000), + 'view_count': int_or_none(media.get('numberOfViews')), + 'comment_count': int_or_none(media.get('numberOfComments')), + 'tags': media.get('tags'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/plays.py b/plugins/youtube_download/yt_dlp/extractor/plays.py new file mode 100644 index 0000000..ddfc6f1 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/plays.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PlaysTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})' + _TESTS = [{ + 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + 'md5': 'dfeac1198506652b5257a62762cec7bc', + 'info_dict': { + 'id': '56af17f56c95335490', + 'ext': 'mp4', + 'title': 'Bjergsen - When you outplay the Azir wall', + 'description': 'Posted by Bjergsen', + } + }, { + 'url': 'https://plays.tv/embeds/56af17f56c95335490', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://plays.tv/video/%s' % video_id, video_id) + + info = self._search_json_ld(webpage, video_id,) + + mpd_url, sources = re.search( + r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', + webpage).groups() + formats = self._extract_mpd_formats( + self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') + for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): + formats.append({ + 'url': self._proto_relative_url(format_url), + 'format_id': 'http-' + format_id, + 'height': int_or_none(height), + }) + self._sort_formats(formats) + + info.update({ + 'id': video_id, + 'description': self._og_search_description(webpage), + 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), + 'formats': formats, + }) + + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/playstuff.py b/plugins/youtube_download/yt_dlp/extractor/playstuff.py new file mode 100644 index 0000000..5a32995 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playstuff.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + smuggle_url, + try_get, +) + + +class PlayStuffIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a', + 'md5': 'c82d3669e5247c64bc382577843e5bd0', + 'info_dict': { + 'id': '6250584958001', + 'ext': 'mp4', + 'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga', + 'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913', + 'uploader_id': '6005208634001', + 'timestamp': 1619491027, + 'upload_date': '20210427', + }, + 'add_ie': ['BrightcoveNew'], + }, { + # geo restricted, bypassable + 'url': 'https://play.stuff.co.nz/details/_6155660351001', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + state = self._parse_json( + self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'), + video_id) + + account_id = try_get( + state, lambda x: x['configurations']['accountId'], + compat_str) or '6005208634001' + player_id = try_get( + state, lambda x: x['configurations']['playerId'], + compat_str) or 'default' + + entries = [] + for item_id, video in state['items'].items(): + if not isinstance(video, dict): + continue + asset_id = try_get( + video, lambda x: x['content']['attributes']['assetId'], + compat_str) + if not asset_id: + continue + entries.append(self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id), + {'geo_countries': ['NZ']}), + 'BrightcoveNew', video_id)) + + return self.playlist_result(entries, video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/playtvak.py b/plugins/youtube_download/yt_dlp/extractor/playtvak.py new file mode 100644 index 0000000..30c8a59 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playtvak.py @@ -0,0 +1,189 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + qualities, +) + + +class PlaytvakIE(InfoExtractor): + IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' + _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' + _TESTS = [{ + 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', + 'md5': '4525ae312c324b4be2f4603cc78ceb4a', + 'info_dict': { + 'id': 'A150730_150323_hodinovy-manzel_kuko', + 'ext': 'mp4', + 'title': 'Vyžeňte vosy a sršně ze zahrady', + 'description': 'md5:4436e61b7df227a093778efb7e373571', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 279, + 'timestamp': 1438732860, + 'upload_date': '20150805', + 'is_live': False, + } + }, { # live video test + 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', + 'info_dict': { + 'id': 'A150624_164934_planespotting_cat', + 'ext': 'flv', + 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # another live stream, this one without Misc.videoFLV + 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', + 'info_dict': { + 'id': 'A151218_145728_hlavni-nadrazi_plap', + 'ext': 'flv', + 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # idnes.cz + 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', + 'md5': '819832ba33cd7016e58a6658577fe289', + 'info_dict': { + 'id': 'A150809_104116_domaci_pku', + 'ext': 'mp4', + 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', + 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 39, + 'timestamp': 1438969140, + 'upload_date': '20150807', + 'is_live': False, + } + }, { # lidovky.cz + 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', + 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', + 'info_dict': { + 'id': 'A150808_214044_ln-video_ELE', + 'ext': 'mp4', + 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', + 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439052180, + 'upload_date': '20150808', + 'is_live': False, + } + }, { # metro.cz + 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', + 'md5': '84fc1deedcac37b7d4a6ccae7c716668', + 'info_dict': { + 'id': 'A141111_173251_metro-extra_row', + 'ext': 'mp4', + 'title': 'Recesisté udělali z billboardu kolotoč', + 'description': 'md5:7369926049588c3989a66c9c1a043c4c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1415725500, + 'upload_date': '20141111', + 'is_live': False, + } + }, { + 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + info_url = self._html_search_regex( + r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') + + parsed_url = compat_urlparse.urlparse(info_url) + + qs = compat_urlparse.parse_qs(parsed_url.query) + qs.update({ + 'reklama': ['0'], + 'type': ['js'], + }) + + info_url = compat_urlparse.urlunparse( + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + + json_info = self._download_json( + info_url, video_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) + + item = None + for i in json_info['items']: + if i.get('type') == 'video' or i.get('type') == 'stream': + item = i + break + if not item: + raise ExtractorError('No suitable stream found') + + quality = qualities(('low', 'middle', 'high')) + + formats = [] + for fmt in item['video']: + video_url = fmt.get('file') + if not video_url: + continue + + format_ = fmt['format'] + format_id = '%s_%s' % (format_, fmt['quality']) + preference = None + + if format_ in ('mp4', 'webm'): + ext = format_ + elif format_ == 'rtmp': + ext = 'flv' + elif format_ == 'apple': + ext = 'mp4' + # Some streams have mp3 audio which does not play + # well with ffmpeg filter aac_adtstoasc + preference = -10 + elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests + continue + else: # Other formats not supported yet + continue + + formats.append({ + 'url': video_url, + 'ext': ext, + 'format_id': format_id, + 'quality': quality(fmt.get('quality')), + 'preference': preference, + }) + self._sort_formats(formats) + + title = item['title'] + is_live = item['type'] == 'stream' + description = self._og_search_description(webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description', default=None) + timestamp = None + duration = None + if not is_live: + duration = int_or_none(item.get('length')) + timestamp = item.get('published') + if timestamp: + timestamp = parse_iso8601(timestamp[:-5]) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': item.get('image'), + 'duration': duration, + 'timestamp': timestamp, + 'is_live': is_live, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/playvid.py b/plugins/youtube_download/yt_dlp/extractor/playvid.py new file mode 100644 index 0000000..4aef186 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playvid.py @@ -0,0 +1,99 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, +) +from ..utils import ( + clean_html, + ExtractorError, +) + + +class PlayvidIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _TESTS = [{ + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', + 'info_dict': { + 'id': 'RnmBNgtrrJu', + 'ext': 'mp4', + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, + 'age_limit': 18, + }, + 'skip': 'Video removed due to ToS', + }, { + 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', + 'md5': '39d49df503ad7b8f23a4432cbf046477', + 'info_dict': { + 'id': 'hwb0GpNkzgH', + 'ext': 'mp4', + 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', + 'age_limit': 18, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + m_error = re.search( + r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) + + video_title = None + duration = None + video_thumbnail = None + formats = [] + + # most of the information is stored in the flashvars + flashvars = self._html_search_regex( + r'flashvars="(.+?)"', webpage, 'flashvars') + + infos = compat_urllib_parse_unquote(flashvars).split(r'&') + for info in infos: + videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) + if videovars_match: + key = videovars_match.group(1) + val = videovars_match.group(2) + + if key == 'title': + video_title = compat_urllib_parse_unquote_plus(val) + if key == 'duration': + try: + duration = int(val) + except ValueError: + pass + if key == 'big_thumb': + video_thumbnail = val + + videourl_match = re.match( + r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) + if videourl_match: + height = int(videourl_match.group('resolution')) + formats.append({ + 'height': height, + 'url': val, + }) + self._sort_formats(formats) + + # Extract title - should be in the flashvars; if not, look elsewhere + if video_title is None: + video_title = self._html_search_regex( + r'<title>(.*?)</title', webpage, 'title') + + return { + 'id': video_id, + 'formats': formats, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'duration': duration, + 'description': None, + 'age_limit': 18 + } diff --git a/plugins/youtube_download/yt_dlp/extractor/playwire.py b/plugins/youtube_download/yt_dlp/extractor/playwire.py new file mode 100644 index 0000000..9c9e597 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/playwire.py @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, +) + + +class PlaywireIE(InfoExtractor): + _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', + 'md5': 'e6398701e3595888125729eaa2329ed9', + 'info_dict': { + 'id': '3353705', + 'ext': 'mp4', + 'title': 'S04_RM_UCL_Rus', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 145.94, + }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # Multiple resolutions while bitrates missing + 'url': 'http://cdn.playwire.com/11625/embed/85228.html', + 'only_matching': True, + }, { + 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', + 'only_matching': True, + }, { + 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') + + player = self._download_json( + 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), + video_id) + + title = player['settings']['title'] + duration = float_or_none(player.get('duration'), 1000) + + content = player['content'] + thumbnail = content.get('poster') + src = content['media']['f4m'] + + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pluralsight.py b/plugins/youtube_download/yt_dlp/extractor/pluralsight.py new file mode 100644 index 0000000..801057e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pluralsight.py @@ -0,0 +1,502 @@ +from __future__ import unicode_literals + +import collections +import json +import os +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + dict_get, + ExtractorError, + float_or_none, + int_or_none, + parse_duration, + parse_qs, + qualities, + srt_subtitles_timecode, + try_get, + update_url_query, + urlencode_postdata, +) + + +class PluralsightBaseIE(InfoExtractor): + _API_BASE = 'https://app.pluralsight.com' + + _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE + _GRAPHQL_HEADERS = { + 'Content-Type': 'application/json;charset=UTF-8', + } + _GRAPHQL_COURSE_TMPL = ''' +query BootstrapPlayer { + rpc { + bootstrapPlayer { + profile { + firstName + lastName + email + username + userHandle + authed + isAuthed + plan + } + course(courseId: "%s") { + name + title + courseHasCaptions + translationLanguages { + code + name + } + supportsWideScreenVideoFormats + timestamp + modules { + name + title + duration + formattedDuration + author + authorized + clips { + authorized + clipId + duration + formattedDuration + id + index + moduleIndex + moduleTitle + name + title + watched + } + } + } + } + } +}''' + + def _download_course(self, course_id, url, display_id): + try: + return self._download_course_rpc(course_id, url, display_id) + except ExtractorError: + # Old API fallback + return self._download_json( + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_id}), + headers={'Referer': url}) + + def _download_course_rpc(self, course_id, url, display_id): + response = self._download_json( + self._GRAPHQL_EP, display_id, data=json.dumps({ + 'query': self._GRAPHQL_COURSE_TMPL % course_id, + 'variables': {} + }).encode('utf-8'), headers=self._GRAPHQL_HEADERS) + + course = try_get( + response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], + dict) + if course: + return course + + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']['message']), + expected=True) + + +class PluralsightIE(PluralsightBaseIE): + IE_NAME = 'pluralsight' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' + _LOGIN_URL = 'https://app.pluralsight.com/id/' + + _NETRC_MACHINE = 'pluralsight' + + _TESTS = [{ + 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', + 'md5': '4d458cf5cf4c593788672419a8dd4cf8', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', + 'ext': 'mp4', + 'title': 'Demo Monitoring', + 'duration': 338, + }, + 'skip': 'Requires pluralsight account credentials', + }, { + 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', + 'only_matching': True, + }, { + # available without pluralsight account + 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', + 'only_matching': True, + }] + + GRAPHQL_VIEWCLIP_TMPL = ''' +query viewClip { + viewClip(input: { + author: "%(author)s", + clipIndex: %(clipIndex)d, + courseName: "%(courseName)s", + includeCaptions: %(includeCaptions)s, + locale: "%(locale)s", + mediaType: "%(mediaType)s", + moduleName: "%(moduleName)s", + quality: "%(quality)s" + }) { + urls { + url + cdn + rank + source + }, + status + } +}''' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'Username': username, + 'Password': password, + }) + + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url') + + if not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + response = self._download_webpage( + post_url, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + error = self._search_regex( + r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): + BLOCKED = 'Your account has been blocked due to suspicious activity' + if BLOCKED in response: + raise ExtractorError( + 'Unable to login: %s' % BLOCKED, expected=True) + MUST_AGREE = 'To continue using Pluralsight, you must agree to' + if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): + raise ExtractorError( + 'Unable to login: %s some documents. Go to pluralsight.com, ' + 'log in and agree with what Pluralsight requires.' + % MUST_AGREE, expected=True) + + raise ExtractorError('Unable to log in') + + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') + TEXT_KEYS = ('text', 'Text') + for num, current in enumerate(subs): + current = subs[num] + start, text = ( + float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), + dict_get(current, TEXT_KEYS)) + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) + if end is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + + def _real_extract(self, url): + qs = parse_qs(url) + + author = qs.get('author', [None])[0] + name = qs.get('name', [None])[0] + clip_idx = qs.get('clip', [None])[0] + course_name = qs.get('course', [None])[0] + + if any(not f for f in (author, name, clip_idx, course_name,)): + raise ExtractorError('Invalid URL', expected=True) + + display_id = '%s-%s' % (name, clip_idx) + + course = self._download_course(course_name, url, display_id) + + collection = course['modules'] + + clip = None + + for module_ in collection: + if name in (module_.get('moduleName'), module_.get('name')): + for clip_ in module_.get('clips', []): + clip_index = clip_.get('clipIndex') + if clip_index is None: + clip_index = clip_.get('index') + if clip_index is None: + continue + if compat_str(clip_index) == clip_idx: + clip = clip_ + break + + if not clip: + raise ExtractorError('Unable to resolve clip') + + title = clip['title'] + clip_id = clip.get('clipName') or clip.get('name') or clip['clipId'] + + QUALITIES = { + 'low': {'width': 640, 'height': 480}, + 'medium': {'width': 848, 'height': 640}, + 'high': {'width': 1024, 'height': 768}, + 'high-widescreen': {'width': 1280, 'height': 720}, + } + + QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) + quality_key = qualities(QUALITIES_PREFERENCE) + + AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) + + ALLOWED_QUALITIES = ( + AllowedQuality('webm', ['high', ]), + AllowedQuality('mp4', ['low', 'medium', 'high', ]), + ) + + # Some courses also offer widescreen resolution for high quality (see + # https://github.com/ytdl-org/youtube-dl/issues/7766) + widescreen = course.get('supportsWideScreenVideoFormats') is True + best_quality = 'high-widescreen' if widescreen else 'high' + if widescreen: + for allowed_quality in ALLOWED_QUALITIES: + allowed_quality.qualities.append(best_quality) + + # In order to minimize the number of calls to ViewClip API and reduce + # the probability of being throttled or banned by Pluralsight we will request + # only single format until formats listing was explicitly requested. + if self.get_param('listformats', False): + allowed_qualities = ALLOWED_QUALITIES + else: + def guess_allowed_qualities(): + req_format = self.get_param('format') or 'best' + req_format_split = req_format.split('-', 1) + if len(req_format_split) > 1: + req_ext, req_quality = req_format_split + req_quality = '-'.join(req_quality.split('-')[:2]) + for allowed_quality in ALLOWED_QUALITIES: + if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: + return (AllowedQuality(req_ext, (req_quality, )), ) + req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4' + return (AllowedQuality(req_ext, (best_quality, )), ) + allowed_qualities = guess_allowed_qualities() + + formats = [] + for ext, qualities_ in allowed_qualities: + for quality in qualities_: + f = QUALITIES[quality].copy() + clip_post = { + 'author': author, + 'includeCaptions': 'false', + 'clipIndex': int(clip_idx), + 'courseName': course_name, + 'locale': 'en', + 'moduleName': name, + 'mediaType': ext, + 'quality': '%dx%d' % (f['width'], f['height']), + } + format_id = '%s-%s' % (ext, quality) + + try: + viewclip = self._download_json( + self._GRAPHQL_EP, display_id, + 'Downloading %s viewclip graphql' % format_id, + data=json.dumps({ + 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, + 'variables': {} + }).encode('utf-8'), + headers=self._GRAPHQL_HEADERS)['data']['viewClip'] + except ExtractorError: + # Still works but most likely will go soon + viewclip = self._download_json( + '%s/video/clips/viewclip' % self._API_BASE, display_id, + 'Downloading %s viewclip JSON' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + + # Pluralsight tracks multiple sequential calls to ViewClip API and start + # to return 429 HTTP errors after some time (see + # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead + # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). + # To somewhat reduce the probability of these consequences + # we will sleep random amount of time before each call to ViewClip. + self._sleep( + random.randint(5, 10), display_id, + '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') + + if not viewclip: + continue + + clip_urls = viewclip.get('urls') + if not isinstance(clip_urls, list): + continue + + for clip_url_data in clip_urls: + clip_url = clip_url_data.get('url') + if not clip_url: + continue + cdn = clip_url_data.get('cdn') + clip_f = f.copy() + clip_f.update({ + 'url': clip_url, + 'ext': ext, + 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, + 'quality': quality_key(quality), + 'source_preference': int_or_none(clip_url_data.get('rank')), + }) + formats.append(clip_f) + + self._sort_formats(formats) + + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) + + return { + 'id': clip_id, + 'title': title, + 'duration': duration, + 'creator': author, + 'formats': formats, + 'subtitles': subtitles, + } + + +class PluralsightCourseIE(PluralsightBaseIE): + IE_NAME = 'pluralsight:course' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' + _TESTS = [{ + # Free course from Pluralsight Starter Subscription for Microsoft TechNet + # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz + 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas', + 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', + 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', + }, + 'playlist_count': 31, + }, { + # available without pluralsight account + 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + # TODO: PSM cookie + + course = self._download_course(course_id, url, course_id) + + title = course['title'] + course_name = course['name'] + course_data = course['modules'] + description = course.get('description') or course.get('shortDescription') + + entries = [] + for num, module in enumerate(course_data, 1): + author = module.get('author') + module_name = module.get('name') + if not author or not module_name: + continue + for clip in module.get('clips', []): + clip_index = int_or_none(clip.get('index')) + if clip_index is None: + continue + clip_url = update_url_query( + '%s/player' % self._API_BASE, query={ + 'mode': 'live', + 'course': course_name, + 'author': author, + 'name': module_name, + 'clip': clip_index, + }) + entries.append({ + '_type': 'url_transparent', + 'url': clip_url, + 'ie_key': PluralsightIE.ie_key(), + 'chapter': module.get('title'), + 'chapter_number': num, + 'chapter_id': module.get('moduleRef'), + }) + + return self.playlist_result(entries, course_id, title, description) diff --git a/plugins/youtube_download/yt_dlp/extractor/plutotv.py b/plugins/youtube_download/yt_dlp/extractor/plutotv.py new file mode 100644 index 0000000..26aff1a --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/plutotv.py @@ -0,0 +1,187 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import uuid + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + try_get, + url_or_none, +) + + +class PlutoTVIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?pluto\.tv(?:/[^/]+)?/on-demand + /(?P<video_type>movies|series) + /(?P<series_or_movie_slug>[^/]+) + (?: + (?:/seasons?/(?P<season_no>\d+))? + (?:/episode/(?P<episode_slug>[^/]+))? + )? + /?(?:$|[#?])''' + + _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/' + _INFO_QUERY_PARAMS = { + 'appName': 'web', + 'appVersion': 'na', + 'clientID': compat_str(uuid.uuid1()), + 'clientModelNumber': 'na', + 'serverSideAds': 'false', + 'deviceMake': 'unknown', + 'deviceModel': 'web', + 'deviceType': 'web', + 'deviceVersion': 'unknown', + 'sid': compat_str(uuid.uuid1()), + } + _TESTS = [ + { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3', + 'md5': 'ebcdd8ed89aaace9df37924f722fd9bd', + 'info_dict': { + 'id': '5de6c598e9379ae4912df0a8', + 'ext': 'mp4', + 'title': 'It\'s In The Cards', + 'episode': 'It\'s In The Cards', + 'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.', + 'series': 'I Love Money', + 'season_number': 2, + 'episode_number': 3, + 'duration': 3600, + } + }, { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/', + 'playlist_count': 11, + 'info_dict': { + 'id': '5de6c582e9379ae4912dedbd', + 'title': 'I Love Money - Season 1', + } + }, { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/', + 'playlist_count': 26, + 'info_dict': { + 'id': '5de6c582e9379ae4912dedbd', + 'title': 'I Love Money', + } + }, { + 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1', + 'md5': '3cead001d317a018bf856a896dee1762', + 'info_dict': { + 'id': '5e83ac701fa6a9001bb9df24', + 'ext': 'mp4', + 'title': 'Arrival', + 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.', + 'duration': 9000, + } + }, { + 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1', + 'only_matching': True, + }, { + 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', + 'only_matching': True, + } + ] + + def _to_ad_free_formats(self, video_id, formats, subtitles): + ad_free_formats, ad_free_subtitles, m3u8_urls = [], {}, set() + for fmt in formats: + res = self._download_webpage( + fmt.get('url'), video_id, note='Downloading m3u8 playlist', + fatal=False) + if not res: + continue + first_segment_url = re.search( + r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res, + re.MULTILINE) + if first_segment_url: + m3u8_urls.add( + compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) + continue + first_segment_url = re.search( + r'^(https?://.*/).+\-0+\.ts$', res, + re.MULTILINE) + if first_segment_url: + m3u8_urls.add( + compat_urlparse.urljoin(first_segment_url.group(1), 'master.m3u8')) + continue + + for m3u8_url in m3u8_urls: + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + ad_free_formats.extend(fmts) + ad_free_subtitles = self._merge_subtitles(ad_free_subtitles, subs) + if ad_free_formats: + formats, subtitles = ad_free_formats, ad_free_subtitles + else: + self.report_warning('Unable to find ad-free formats') + return formats, subtitles + + def _get_video_info(self, video_json, slug, series_name=None): + video_id = video_json.get('_id', slug) + formats, subtitles = [], {} + for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []: + if video_url.get('type') != 'hls': + continue + url = url_or_none(video_url.get('url')) + + fmts, subs = self._extract_m3u8_formats_and_subtitles( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + + formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles) + self._sort_formats(formats) + + info = { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': video_json.get('name'), + 'description': video_json.get('description'), + 'duration': float_or_none(video_json.get('duration'), scale=1000), + } + if series_name: + info.update({ + 'series': series_name, + 'episode': video_json.get('name'), + 'season_number': int_or_none(video_json.get('season')), + 'episode_number': int_or_none(video_json.get('number')), + }) + return info + + def _real_extract(self, url): + mobj = self._match_valid_url(url).groupdict() + info_slug = mobj['series_or_movie_slug'] + video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS) + + if mobj['video_type'] == 'series': + series_name = video_json.get('name', info_slug) + season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug') + + videos = [] + for season in video_json['seasons']: + if season_number is not None and season_number != int_or_none(season.get('number')): + continue + for episode in season['episodes']: + if episode_slug is not None and episode_slug != episode.get('slug'): + continue + videos.append(self._get_video_info(episode, episode_slug, series_name)) + if not videos: + raise ExtractorError('Failed to find any videos to extract') + if episode_slug is not None and len(videos) == 1: + return videos[0] + playlist_title = series_name + if season_number is not None: + playlist_title += ' - Season %d' % season_number + return self.playlist_result(videos, + playlist_id=video_json.get('_id', info_slug), + playlist_title=playlist_title) + return self._get_video_info(video_json, info_slug) diff --git a/plugins/youtube_download/yt_dlp/extractor/podomatic.py b/plugins/youtube_download/yt_dlp/extractor/podomatic.py new file mode 100644 index 0000000..673a3ab --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/podomatic.py @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PodomaticIE(InfoExtractor): + IE_NAME = 'podomatic' + _VALID_URL = r'''(?x) + (?P<proto>https?):// + (?: + (?P<channel>[^.]+)\.podomatic\.com/entry| + (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes + )/ + (?P<id>[^/?#&]+) + ''' + + _TESTS = [{ + 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', + 'md5': '84bb855fcf3429e6bf72460e1eed782d', + 'info_dict': { + 'id': '2009-01-02T16_03_35-08_00', + 'ext': 'mp3', + 'uploader': 'Science Teaching Tips', + 'uploader_id': 'scienceteachingtips', + 'title': '64. When the Moon Hits Your Eye', + 'duration': 446, + } + }, { + 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', + 'md5': 'd2cf443931b6148e27638650e2638297', + 'info_dict': { + 'id': '2013-11-15T16_31_21-08_00', + 'ext': 'mp3', + 'uploader': 'Ostbahnhof / Techno Mix', + 'uploader_id': 'ostbahnhof', + 'title': 'Einunddreizig', + 'duration': 3799, + } + }, { + 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + channel = mobj.group('channel') or mobj.group('channel_2') + + json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + + '?permalink=true&rtmp=0') % + (mobj.group('proto'), channel, video_id)) + data_json = self._download_webpage( + json_url, video_id, 'Downloading video info') + data = json.loads(data_json) + + video_url = data['downloadLink'] + if not video_url: + video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) + uploader = data['podcast'] + title = data['title'] + thumbnail = data['imageLocation'] + duration = int_or_none(data.get('length'), 1000) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'uploader': uploader, + 'uploader_id': channel, + 'thumbnail': thumbnail, + 'duration': duration, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pokemon.py b/plugins/youtube_download/yt_dlp/extractor/pokemon.py new file mode 100644 index 0000000..402b574 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pokemon.py @@ -0,0 +1,140 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + extract_attributes, + int_or_none, + js_to_json, + merge_dicts, +) + + +class PokemonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', + 'md5': '2fe8eaec69768b25ef898cda9c43062e', + 'info_dict': { + 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', + 'ext': 'mp4', + 'title': 'The Ol’ Raise and Switch!', + 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', + }, + 'add_id': ['LimelightMedia'], + }, { + # no data-video-title + 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', + 'info_dict': { + 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', + 'ext': 'mp4', + 'title': "Pokémon : L'ascension de Darkrai", + 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', + }, + 'add_id': ['LimelightMedia'], + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, video_id or display_id) + video_data = extract_attributes(self._search_regex( + r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + webpage, 'video data element')) + video_id = video_data['data-video-id'] + title = video_data.get('data-video-title') or self._html_search_meta( + 'pkm-title', webpage, ' title', default=None) or self._search_regex( + r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'title': title, + 'description': video_data.get('data-video-summary'), + 'thumbnail': video_data.get('data-video-poster'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('data-video-season')), + 'episode': title, + 'episode_number': int_or_none(video_data.get('data-video-episode')), + 'ie_key': 'LimelightMedia', + } + + +class PokemonWatchIE(InfoExtractor): + _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})' + _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}' + _TESTS = [{ + 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667', + 'md5': '62833938a31e61ab49ada92f524c42ff', + 'info_dict': { + 'id': '8309a40969894a8e8d5bc1311e9c5667', + 'ext': 'mp4', + 'title': 'Lillier and the Staff!', + 'description': 'md5:338841b8c21b283d24bdc9b568849f04', + } + }, { + 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2', + 'only_matching': True + }, { + 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07', + 'only_matching': True + }] + + def _extract_media(self, channel_array, video_id): + for channel in channel_array: + for media in channel.get('media'): + if media.get('id') == video_id: + return media + return None + + def _real_extract(self, url): + video_id = self._match_id(url) + + info = { + '_type': 'url', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'ie_key': 'LimelightMedia', + } + + # API call can be avoided entirely if we are listing formats + if self.get_param('listformats', False): + return info + + webpage = self._download_webpage(url, video_id) + build_vars = self._parse_json(self._search_regex( + r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'), + video_id, transform_source=js_to_json) + region = build_vars.get('region') + channel_array = self._download_json(self._API_URL.format(region), video_id) + video_data = self._extract_media(channel_array, video_id) + + if video_data is None: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + info['_type'] = 'url_transparent' + images = video_data.get('images') + + return merge_dicts(info, { + 'title': video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': images.get('medium') or images.get('small'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('season')), + 'episode': video_data.get('title'), + 'episode_number': int_or_none(video_data.get('episode')), + }) diff --git a/plugins/youtube_download/yt_dlp/extractor/pokergo.py b/plugins/youtube_download/yt_dlp/extractor/pokergo.py new file mode 100644 index 0000000..d27031c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pokergo.py @@ -0,0 +1,111 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_get, +) + + +class PokerGoBaseIE(InfoExtractor): + _NETRC_MACHINE = 'pokergo' + _AUTH_TOKEN = None + _PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d' + + def _login(self): + username, password = self._get_login_info() + if not username: + self.raise_login_required(method='password') + + self.report_login() + PokerGoBaseIE._AUTH_TOKEN = self._download_json( + f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None, + headers={'authorization': f'Basic {base64.b64encode(f"{username}:{password}".encode()).decode()}'}, + data=b'')['meta']['token'] + if not self._AUTH_TOKEN: + raise ExtractorError('Unable to get Auth Token.', expected=True) + + def _real_initialize(self): + if not self._AUTH_TOKEN: + self._login() + + +class PokerGoIE(PokerGoBaseIE): + _VALID_URL = r'https?://(?:www\.)?pokergo\.com/videos/(?P<id>[^&$#/?]+)' + + _TESTS = [{ + 'url': 'https://www.pokergo.com/videos/2a70ec4e-4a80-414b-97ec-725d9b72a7dc', + 'info_dict': { + 'id': 'aVLOxDzY', + 'ext': 'mp4', + 'title': 'Poker After Dark | Season 12 (2020) | Cry Me a River | Episode 2', + 'description': 'md5:c7a8c29556cbfb6eb3c0d5d622251b71', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/aVLOxDzY/poster.jpg?width=720', + 'timestamp': 1608085715, + 'duration': 2700.12, + 'season_number': 12, + 'episode_number': 2, + 'series': 'poker after dark', + 'upload_date': '20201216', + 'season': 'Season 12', + 'episode': 'Episode 2', + 'display_id': '2a70ec4e-4a80-414b-97ec-725d9b72a7dc', + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{id}', id, + headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + v_id = data_json['source'] + + thumbnails = [{ + 'url': image['url'], + 'id': image.get('label'), + 'width': image.get('width'), + 'height': image.get('height') + } for image in data_json.get('images') or [] if image.get('url')] + series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == id) or {} + + return { + '_type': 'url_transparent', + 'display_id': id, + 'title': data_json.get('title'), + 'description': data_json.get('description'), + 'duration': data_json.get('duration'), + 'thumbnails': thumbnails, + 'season_number': series_json.get('season'), + 'episode_number': series_json.get('episode_number'), + 'series': try_get(series_json, lambda x: x['tag']['name']), + 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}' + } + + +class PokerGoCollectionIE(PokerGoBaseIE): + _VALID_URL = r'https?://(?:www\.)?pokergo\.com/collections/(?P<id>[^&$#/?]+)' + + _TESTS = [{ + 'url': 'https://www.pokergo.com/collections/19ffe481-5dae-481a-8869-75cc0e3c4700', + 'playlist_mincount': 13, + 'info_dict': { + 'id': '19ffe481-5dae-481a-8869-75cc0e3c4700', + }, + }] + + def _entries(self, id): + data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{id}?include=entities', + id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + for video in data_json.get('collection_video') or []: + video_id = video.get('id') + if video_id: + yield self.url_result( + f'https://www.pokergo.com/videos/{video_id}', + ie=PokerGoIE.ie_key(), video_id=video_id) + + def _real_extract(self, url): + id = self._match_id(url) + return self.playlist_result(self._entries(id), playlist_id=id) diff --git a/plugins/youtube_download/yt_dlp/extractor/polsatgo.py b/plugins/youtube_download/yt_dlp/extractor/polsatgo.py new file mode 100644 index 0000000..1e3f46c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/polsatgo.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from uuid import uuid4 +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + url_or_none, + ExtractorError, +) + + +class PolsatGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P<id>[0-9a-fA-F]+)(?:[/#?]|$)' + _TESTS = [{ + 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', + 'info_dict': { + 'id': '4121', + 'ext': 'mp4', + 'title': 'Świat według Kiepskich - Odcinek 88', + 'age_limit': 12, + }, + }] + + def _extract_formats(self, sources, video_id): + for source in sources or []: + if not source.get('id'): + continue + url = url_or_none(self._call_api( + 'drm', video_id, 'getPseudoLicense', + {'mediaId': video_id, 'sourceId': source['id']}).get('url')) + if not url: + continue + yield { + 'url': url, + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] + + formats = list(self._extract_formats( + try_get(media, lambda x: x['playback']['mediaSources']), video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': media['displayInfo']['title'], + 'formats': formats, + 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + } + + def _call_api(self, endpoint, media_id, method, params): + rand_uuid = str(uuid4()) + res = self._download_json( + f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, + note=f'Downloading {method} JSON metadata', + data=json.dumps({ + 'method': method, + 'id': '2137', + 'jsonrpc': '2.0', + 'params': { + **params, + 'userAgentData': { + 'deviceType': 'mobile', + 'application': 'native', + 'os': 'android', + 'build': 10003, + 'widevine': False, + 'portal': 'pg', + 'player': 'cpplayer', + }, + 'deviceId': { + 'type': 'other', + 'value': rand_uuid, + }, + 'clientId': rand_uuid, + 'cpid': 1, + }, + }).encode('utf-8'), + headers={'Content-type': 'application/json'}) + if not res.get('result'): + if res['error']['code'] == 13404: + raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) + raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') + return res['result'] diff --git a/plugins/youtube_download/yt_dlp/extractor/polskieradio.py b/plugins/youtube_download/yt_dlp/extractor/polskieradio.py new file mode 100644 index 0000000..b2b3eb2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/polskieradio.py @@ -0,0 +1,432 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import json +import math +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, + compat_urlparse +) +from ..utils import ( + extract_attributes, + ExtractorError, + InAdvancePagedList, + int_or_none, + js_to_json, + parse_iso8601, + strip_or_none, + unified_timestamp, + unescapeHTML, + url_or_none, +) + + +class PolskieRadioBaseExtractor(InfoExtractor): + def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file']) + if media_url in media_urls: + continue + media_urls.add(media_url) + entry = base_data.copy() + entry.update({ + 'id': compat_str(media['id']), + 'url': media_url, + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + }) + entry_title = compat_urllib_parse_unquote(media['desc']) + if entry_title: + entry['title'] = entry_title + yield entry + + +class PolskieRadioIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' + _TESTS = [{ # Old-style single broadcast. + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', + 'info_dict': { + 'id': '1587943', + 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', + 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', + }, + 'playlist': [{ + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '1540576', + 'ext': 'mp3', + 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', + 'timestamp': 1456594200, + 'upload_date': '20160227', + 'duration': 2364, + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + }, + }], + }, { # New-style single broadcast. + 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', + 'info_dict': { + 'id': '2534482', + 'title': 'Żagaryści. Poezja jak spoiwo', + 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695', + }, + 'playlist': [{ + 'md5': 'd07559829f61d5a93a75755987ded760', + 'info_dict': { + 'id': '2516679', + 'ext': 'mp3', + 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c', + 'timestamp': 1592654400, + 'upload_date': '20200620', + 'duration': 1430, + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + }, + }], + }, { + # PR4 audition - other frontend + 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', + 'info_dict': { + 'id': '2610977', + 'ext': 'mp3', + 'title': 'Pogłos 29 października godz. 23:01', + }, + }, { + 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', + 'only_matching': True, + }, { + # with mp4 video + 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', + 'only_matching': True, + }, { + 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + content = self._search_regex( + r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', + webpage, 'content', default=None) + + timestamp = unified_timestamp(self._html_search_regex( + r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', + webpage, 'timestamp', default=None)) + + thumbnail_url = self._og_search_thumbnail(webpage, default=None) + + title = self._og_search_title(webpage).strip() + + description = strip_or_none(self._og_search_description(webpage, default=None)) + description = description.replace('\xa0', ' ') if description is not None else None + + if not content: + return { + 'id': playlist_id, + 'url': self._proto_relative_url( + self._search_regex( + r"source:\s*'(//static\.prsa\.pl/[^']+)'", + webpage, 'audition record url')), + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + } + + entries = self._extract_webpage_player_entries(content, playlist_id, { + 'title': title, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + }) + + return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'<title>([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) + + +class PolskieRadioPlayerIE(InfoExtractor): + IE_NAME = 'polskieradio:player' + _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' + + _BASE_URL = 'https://player.polskieradio.pl' + _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' + _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' + + _TESTS = [{ + 'url': 'https://player.polskieradio.pl/anteny/trojka', + 'info_dict': { + 'id': '3', + 'ext': 'm4a', + 'title': 'Trójka', + }, + 'params': { + 'format': 'bestaudio', + 'skip_download': 'endless stream', + }, + }] + + def _get_channel_list(self, channel_url='no_channel'): + player_code = self._download_webpage( + self._PLAYER_URL, channel_url, + note='Downloading js player') + channel_list = js_to_json(self._search_regex( + r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) + return self._parse_json(channel_list, channel_url) + + def _real_extract(self, url): + channel_url = self._match_id(url) + channel_list = self._get_channel_list(channel_url) + + channel = next((c for c in channel_list if c.get('url') == channel_url), None) + + if not channel: + raise ExtractorError('Channel not found') + + station_list = self._download_json(self._STATIONS_API_URL, channel_url, + note='Downloading stream url list', + headers={ + 'Accept': 'application/json', + 'Referer': url, + 'Origin': self._BASE_URL, + }) + station = next((s for s in station_list + if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) + if not station: + raise ExtractorError('Station not found even though we extracted channel') + + formats = [] + for stream_url in station['Streams']: + stream_url = self._proto_relative_url(stream_url) + if stream_url.endswith('/playlist.m3u8'): + formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) + elif stream_url.endswith('/manifest.f4m'): + formats.extend(self._extract_mpd_formats(stream_url, channel_url)) + elif stream_url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats(stream_url, channel_url)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + return { + 'id': compat_str(channel['id']), + 'formats': formats, + 'title': channel.get('name') or channel.get('streamName'), + 'display_id': channel_url, + 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', + 'is_live': True, + } + + +class PolskieRadioPodcastBaseExtractor(InfoExtractor): + _API_BASE = 'https://apipodcasts.polskieradio.pl/api' + + def _parse_episode(self, data): + return { + 'id': data['guid'], + 'formats': [{ + 'url': data['url'], + 'filesize': int_or_none(data.get('fileSize')), + }], + 'title': data['title'], + 'description': data.get('description'), + 'duration': int_or_none(data.get('length')), + 'timestamp': parse_iso8601(data.get('publishDate')), + 'thumbnail': url_or_none(data.get('image')), + 'series': data.get('podcastTitle'), + 'episode': data['title'], + } + + +class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast:list' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/podcast/8/', + 'info_dict': { + 'id': '8', + 'title': 'Śniadanie w Trójce', + 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', + 'uploader': 'Beata Michniewicz', + }, + 'playlist_mincount': 714, + }] + _PAGE_SIZE = 10 + + def _call_api(self, podcast_id, page): + return self._download_json( + f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', + podcast_id, f'Downloading page {page}') + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._call_api(podcast_id, 1) + + def get_page(page_num): + page_data = self._call_api(podcast_id, page_num + 1) if page_num else data + yield from (self._parse_episode(ep) for ep in page_data['items']) + + return { + '_type': 'playlist', + 'entries': InAdvancePagedList( + get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), + 'id': str(data['id']), + 'title': data['title'], + 'description': data.get('description'), + 'uploader': data.get('announcer'), + } + + +class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', + 'info_dict': { + 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', + 'ext': 'mp3', + 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + }, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._download_json( + f'{self._API_BASE}/audio', + podcast_id, 'Downloading podcast metadata', + data=json.dumps({ + 'guids': [podcast_id], + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + return self._parse_episode(data[0]) + + +class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P[0-9]+)' + IE_NAME = 'polskieradio:kierowcow' + + _TESTS = [{ + 'url': 'https://radiokierowcow.pl/artykul/2694529', + 'info_dict': { + 'id': '2694529', + 'title': 'Zielona fala reliktem przeszłości?', + 'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId'] + article = self._download_json( + f'https://radiokierowcow.pl/_next/data/{nextjs_build}/artykul/{media_id}.json?articleId={media_id}', + media_id) + data = article['pageProps']['data'] + title = data['title'] + entries = self._extract_webpage_player_entries(data['content'], media_id, { + 'title': title, + }) + + return { + '_type': 'playlist', + 'id': media_id, + 'entries': entries, + 'title': title, + 'description': data.get('lead'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/popcorntimes.py b/plugins/youtube_download/yt_dlp/extractor/popcorntimes.py new file mode 100644 index 0000000..5f9d0e7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/popcorntimes.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_chr, +) +from ..utils import int_or_none + + +class PopcorntimesIE(InfoExtractor): + _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P[^/]+)/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy', + 'md5': '93f210991ad94ba8c3485950a2453257', + 'info_dict': { + 'id': 'A1XCFvz', + 'display_id': 'haensel-und-gretel-opera-fantasy', + 'ext': 'mp4', + 'title': 'Hänsel und Gretel', + 'description': 'md5:1b8146791726342e7b22ce8125cf6945', + 'thumbnail': r're:^https?://.*\.jpg$', + 'creator': 'John Paul', + 'release_date': '19541009', + 'duration': 4260, + 'tbr': 5380, + 'width': 720, + 'height': 540, + }, + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + title = self._search_regex( + r'

([^<]+)', webpage, 'title', + default=None) or self._html_search_meta( + 'ya:ovs:original_name', webpage, 'title', fatal=True) + + loc = self._search_regex( + r'PCTMLOC\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'loc', + group='value') + + loc_b64 = '' + for c in loc: + c_ord = ord(c) + if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'): + upper = ord('Z') if c_ord <= ord('Z') else ord('z') + c_ord += 13 + if upper < c_ord: + c_ord -= 26 + loc_b64 += compat_chr(c_ord) + + video_url = compat_b64decode(loc_b64).decode('utf-8') + + description = self._html_search_regex( + r'(?s)]+class=["\']pt-movie-desc[^>]+>(.+?)', webpage, + 'description', fatal=False) + + thumbnail = self._search_regex( + r']+class=["\']video-preview[^>]+\bsrc=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'thumbnail', default=None, + group='value') or self._og_search_thumbnail(webpage) + + creator = self._html_search_meta( + 'video:director', webpage, 'creator', default=None) + + release_date = self._html_search_meta( + 'video:release_date', webpage, default=None) + if release_date: + release_date = release_date.replace('-', '') + + def int_meta(name): + return int_or_none(self._html_search_meta( + name, webpage, default=None)) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'creator': creator, + 'release_date': release_date, + 'duration': int_meta('video:duration'), + 'tbr': int_meta('ya:ovs:bitrate'), + 'width': int_meta('og:video:width'), + 'height': int_meta('og:video:height'), + 'http_headers': { + 'Referer': url, + }, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/popcorntv.py b/plugins/youtube_download/yt_dlp/extractor/popcorntv.py new file mode 100644 index 0000000..66d2e50 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/popcorntv.py @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, + unified_timestamp, +) + + +class PopcornTVIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P[^/]+)/(?P\d+)' + _TESTS = [{ + 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', + 'md5': '47d65a48d147caf692ab8562fe630b45', + 'info_dict': { + 'id': '9183', + 'display_id': 'food-wars-battaglie-culinarie-episodio-01', + 'ext': 'mp4', + 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', + 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1497610857, + 'upload_date': '20170616', + 'duration': 1440, + 'view_count': int, + }, + }, { + 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id, video_id = mobj.group('display_id', 'id') + + webpage = self._download_webpage(url, display_id) + + m3u8_url = extract_attributes( + self._search_regex( + r'(]+itemprop=["\'](?:content|embed)Url[^>]*>)', + webpage, 'content' + ))['href'] + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + title = self._search_regex( + r']+itemprop=["\']name[^>]*>([^<]+)', webpage, + 'title', default=None) or self._og_search_title(webpage) + + description = self._html_search_regex( + r'(?s)]+itemprop=["\']description[^>]*>(.+?)', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')) + duration = int_or_none(self._html_search_meta( + 'duration', webpage), invscale=60) + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/porn91.py b/plugins/youtube_download/yt_dlp/extractor/porn91.py new file mode 100644 index 0000000..20eac64 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/porn91.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, + ExtractorError, +) + + +class Porn91IE(InfoExtractor): + IE_NAME = '91porn' + _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P[\w\d]+)' + + _TEST = { + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': '7fcdb5349354f40d41689bd0fa8db05a', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'ext': 'mp4', + 'duration': 431, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._set_cookie('91porn.com', 'language', 'cn_CN') + + webpage = self._download_webpage( + 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) + + if '作为游客,你每天只可观看10个视频' in webpage: + raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) + + title = self._search_regex( + r'
([^<]+)
', webpage, 'title') + title = title.replace('\n', '') + + video_link_url = self._search_regex( + r']+id=["\']fm-video_link[^>]+>([^<]+)', + webpage, 'video link') + videopage = self._download_webpage(video_link_url, video_id) + + info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] + + duration = parse_duration(self._search_regex( + r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) + + comment_count = int_or_none(self._search_regex( + r'留言:\s*\s*(\d+)', webpage, 'comment count', fatal=False)) + + info_dict.update({ + 'id': video_id, + 'title': title, + 'duration': duration, + 'comment_count': comment_count, + 'age_limit': self._rta_search(webpage), + }) + + return info_dict diff --git a/plugins/youtube_download/yt_dlp/extractor/porncom.py b/plugins/youtube_download/yt_dlp/extractor/porncom.py new file mode 100644 index 0000000..83df221 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/porncom.py @@ -0,0 +1,103 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + js_to_json, + parse_filesize, + str_to_int, +) + + +class PornComIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ + 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', + 'md5': '3f30ce76267533cd12ba999263156de7', + 'info_dict': { + 'id': '2603339', + 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', + 'ext': 'mp4', + 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 551, + 'view_count': int, + 'age_limit': 18, + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + (r'=\s*({.+?})\s*;\s*v1ar\b', + r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='), + webpage, 'config', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + if config: + title = config['title'] + formats = [{ + 'url': stream['url'], + 'format_id': stream.get('id'), + 'height': int_or_none(self._search_regex( + r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) + } for stream in config['streams'] if stream.get('url')] + thumbnail = (compat_urlparse.urljoin( + config['thumbCDN'], config['poster']) + if config.get('thumbCDN') and config.get('poster') else None) + duration = int_or_none(config.get('length')) + else: + title = self._search_regex( + (r'([^<]+)', r']*>([^<]+)

'), + webpage, 'title') + formats = [{ + 'url': compat_urlparse.urljoin(url, format_url), + 'format_id': '%sp' % height, + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + } for format_url, height, filesize in re.findall( + r']+href="(/download/[^"]+)">[^<]*?(\d+)p]*>(\d+\s*[a-zA-Z]+)<', + webpage)] + thumbnail = None + duration = None + + self._sort_formats(formats) + + view_count = str_to_int(self._search_regex( + (r'Views:\s*\s*\s*([\d,.]+)', + r'class=["\']views["\'][^>]*>

([\d,.]+)'), webpage, + 'view count', fatal=False)) + + def extract_list(kind): + s = self._search_regex( + (r'(?s)%s:\s*\s*(.+?)' % kind.capitalize(), + r'(?s)]*>%s:(.+?)

' % kind.capitalize()), + webpage, kind, fatal=False) + return re.findall(r']+>([^<]+)
', s or '') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + 'age_limit': 18, + 'categories': extract_list('categories'), + 'tags': extract_list('tags'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pornez.py b/plugins/youtube_download/yt_dlp/extractor/pornez.py new file mode 100644 index 0000000..713dc00 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornez.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import int_or_none + + +class PornezIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P[0-9]+)/' + _TEST = { + 'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/', + 'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc', + 'info_dict': { + 'id': '344819', + 'ext': 'mp4', + 'title': r'mistresst funny_penis_names wmv', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + iframe_src = self._html_search_regex( + r']+src="(https?://pornez\.net/player/\?[^"]+)"', webpage, 'iframe', fatal=True) + title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None) + if title is None: + title = self._search_regex(r'

(.*?)

', webpage, 'title', fatal=True) + thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None) + webpage = self._download_webpage(iframe_src, video_id) + entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0] + for format in entries['formats']: + height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height') + format['format_id'] = '%sp' % height + format['height'] = int_or_none(height) + + entries.update({ + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'age_limit': 18 + }) + return entries diff --git a/plugins/youtube_download/yt_dlp/extractor/pornflip.py b/plugins/youtube_download/yt_dlp/extractor/pornflip.py new file mode 100644 index 0000000..accf452 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornflip.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601 +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P[^/]+)' + _TESTS = [ + { + 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou', + 'info_dict': { + 'id': 'dzv9Mtw1qj2', + 'ext': 'mp4', + 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou', + 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821', + 'duration': 476, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'timestamp': 1617846819, + 'upload_date': '20210408', + 'uploader': 'Brazzers', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://www.pornflip.com/v/IrJEC40i21L', + 'only_matching': True, + }, + { + 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple', + 'only_matching': True, + }, + { + 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU', + 'only_matching': True, + }, + ] + _HOST = 'www.pornflip.com' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST}) + description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False) + duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False) + view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False) + title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)', webpage, 'title', fatal=False) + uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*]*>([^<]+)<', webpage, 'uploader', fatal=False) + upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False) + likes = self._html_search_regex( + r'class="btn btn-up-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False) + dislikes = self._html_search_regex( + r'class="btn btn-down-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False) + mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&') + formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') + self._sort_formats(formats) + + return { + 'age_limit': 18, + 'description': description, + 'dislike_count': int_or_none(dislikes), + 'duration': parse_duration(duration), + 'formats': formats, + 'id': video_id, + 'like_count': int_or_none(likes), + 'timestamp': parse_iso8601(upload_date), + 'thumbnail': self._og_search_thumbnail(webpage), + 'title': title, + 'uploader': uploader, + 'view_count': int_or_none(view_count), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pornhd.py b/plugins/youtube_download/yt_dlp/extractor/pornhd.py new file mode 100644 index 0000000..9dbd72f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornhd.py @@ -0,0 +1,120 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + js_to_json, + merge_dicts, + urljoin, +) + + +class PornHdIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' + _TESTS = [{ + 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'md5': '87f1540746c1d32ec7a2305c12b96b25', + 'info_dict': { + 'id': '9864', + 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'ext': 'mp4', + 'title': 'Restroom selfie masturbation', + 'description': 'md5:3748420395e03e31ac96857a8f125b2b', + 'thumbnail': r're:^https?://.*\.jpg', + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + }, + 'skip': 'HTTP Error 404: Not Found', + }, { + 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', + 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', + 'info_dict': { + 'id': '1962', + 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', + 'ext': 'mp4', + 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', + 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', + 'thumbnail': r're:^https?://.*\.jpg', + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id or video_id) + + title = self._html_search_regex( + [r']+class=["\']video-name["\'][^>]*>([^<]+)', + r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') + + sources = self._parse_json(js_to_json(self._search_regex( + r"(?s)sources'?\s*[:=]\s*(\{.+?\})", + webpage, 'sources', default='{}')), video_id) + + info = {} + if not sources: + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info = entries[0] + + if not sources and not info: + message = self._html_search_regex( + r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P.+?)]+class=["\']video-description[^>]+>(?P.+?)', + r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)(?:(?!\1).)+)\1", webpage, + 'thumbnail', default=None, group='url') + + like_count = int_or_none(self._search_regex( + (r'(\d+)\s*likes', + r'(\d+)\s*]+>(?: |\s)*\blikes', + r'class=["\']save-count["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) + + return merge_dicts(info, { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'like_count': like_count, + 'formats': formats, + 'age_limit': 18, + }) diff --git a/plugins/youtube_download/yt_dlp/extractor/pornhub.py b/plugins/youtube_download/yt_dlp/extractor/pornhub.py new file mode 100644 index 0000000..17c8c91 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornhub.py @@ -0,0 +1,817 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import functools +import itertools +import math +import operator +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urllib_request, +) +from .openload import PhantomJSwrapper +from ..utils import ( + clean_html, + determine_ext, + ExtractorError, + format_field, + int_or_none, + merge_dicts, + NO_DEFAULT, + orderedSet, + remove_quotes, + str_to_int, + update_url_query, + urlencode_postdata, + url_or_none, +) + + +class PornHubBaseIE(InfoExtractor): + _NETRC_MACHINE = 'pornhub' + _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)' + + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + ret = dl(*args, **kwargs) + + if not ret: + return ret + + webpage, urlh = ret + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + def _real_initialize(self): + self._logged_in = False + + def _login(self, host): + if self._logged_in: + return + + site = host.split('.')[0] + + # Both sites pornhub and pornhubpremium have separate accounts + # so there should be an option to provide credentials for both. + # At the same time some videos are available under the same video id + # on both sites so that we have to identify them as the same video. + # For that purpose we have to keep both in the same extractor + # but under different netrc machines. + username, password = self._get_login_info(netrc_machine=site) + if username is None: + return + + login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '') + login_page = self._download_webpage( + login_url, None, 'Downloading %s login page' % site) + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']signOut', + r'>Sign\s+[Oo]ut\s*<')) + + if is_logged(login_page): + self._logged_in = True + return + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + response = self._download_json( + 'https://www.%s/front/authenticate' % host, None, + 'Logging in to %s' % site, + data=urlencode_postdata(login_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': login_url, + 'X-Requested-With': 'XMLHttpRequest', + }) + + if response.get('success') == '1': + self._logged_in = True + return + + message = response.get('message') + if message is not None: + raise ExtractorError( + 'Unable to login: %s' % message, expected=True) + + raise ExtractorError('Unable to log in') + + +class PornHubIE(PornHubBaseIE): + IE_DESC = 'PornHub and Thumbzilla' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[^/]+\.)? + %s + /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:www\.)?thumbzilla\.com/video/ + ) + (?P[\da-z]+) + ''' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', + 'md5': 'a6391306d050e4547f62b3f485dd9ba9', + 'info_dict': { + 'id': '648719015', + 'ext': 'mp4', + 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', + 'uploader': 'Babes', + 'upload_date': '20130628', + 'timestamp': 1372447216, + 'duration': 361, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'cast': list, + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': '重庆婷婷女王足交', + 'upload_date': '20150213', + 'timestamp': 1423804862, + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy', + }, { + # subtitles + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', + 'info_dict': { + 'id': 'ph5af5fef7c2aa7', + 'ext': 'mp4', + 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', + 'uploader': 'BFFs', + 'duration': 622, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'subtitles': { + 'en': [{ + "ext": 'srt' + }] + }, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This video has been disabled', + }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', + 'only_matching': True, + }, { + # removed at the request of cam4.com + 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', + 'only_matching': True, + }, { + # removed at the request of the copyright owner + 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', + 'only_matching': True, + }, { + # removed by uploader + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', + 'only_matching': True, + }, { + # private video + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', + 'only_matching': True, + }, { + 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', + 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', + 'only_matching': True, + }, { + # Some videos are available with the same id on both premium + # and non-premium sites (e.g. this and the following test) + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', + 'only_matching': True, + }, { + # geo restricted + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, + }, { + 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)', + webpage) + + def _extract_count(self, pattern, webpage, name): + return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None)) + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') or 'pornhub.com' + video_id = mobj.group('id') + + self._login(host) + + self._set_cookie(host, 'age_verified', '1') + + def dl_webpage(platform): + self._set_cookie(host, 'platform', platform) + return self._download_webpage( + 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), + video_id, 'Downloading %s webpage' % platform) + + webpage = dl_webpage('pc') + + error_msg = self._html_search_regex( + (r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', + r'(?s)]+class=["\']noVideo["\'][^>]*>(?P.+?)'), + webpage, 'error message', default=None, group='error') + if error_msg: + error_msg = re.sub(r'\s+', ' ', error_msg) + raise ExtractorError( + 'PornHub said: %s' % error_msg, + expected=True, video_id=video_id) + + if any(re.search(p, webpage) for p in ( + r'class=["\']geoBlocked["\']', + r'>\s*This content is unavailable in your country')): + self.raise_geo_restricted() + + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._html_search_regex( + (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)</h1>', + r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title') + + video_urls = [] + video_urls_set = set() + subtitles = {} + + flashvars = self._parse_json( + self._search_regex( + r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), + video_id) + if flashvars: + subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) + if subtitle_url: + subtitles.setdefault('en', []).append({ + 'url': subtitle_url, + 'ext': 'srt', + }) + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + media_definitions = flashvars.get('mediaDefinitions') + if isinstance(media_definitions, list): + for definition in media_definitions: + if not isinstance(definition, dict): + continue + video_url = definition.get('videoUrl') + if not video_url or not isinstance(video_url, compat_str): + continue + if video_url in video_urls_set: + continue + video_urls_set.add(video_url) + video_urls.append( + (video_url, int_or_none(definition.get('quality')))) + else: + thumbnail, duration = [None] * 2 + + def extract_js_vars(webpage, pattern, default=NO_DEFAULT): + assignments = self._search_regex( + pattern, webpage, 'encoded url', default=default) + if not assignments: + return {} + + assignments = assignments.split(';') + + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + return js_vars + + def add_video_url(video_url): + v_url = url_or_none(video_url) + if not v_url: + return + if v_url in video_urls_set: + return + video_urls.append((v_url, None)) + video_urls_set.add(v_url) + + def parse_quality_items(quality_items): + q_items = self._parse_json(quality_items, video_id, fatal=False) + if not isinstance(q_items, list): + return + for item in q_items: + if isinstance(item, dict): + add_video_url(item.get('url')) + + if not video_urls: + FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') + js_vars = extract_js_vars( + webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + default=None) + if js_vars: + for key, format_url in js_vars.items(): + if key.startswith(FORMAT_PREFIXES[-1]): + parse_quality_items(format_url) + elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): + add_video_url(format_url) + if not video_urls and re.search( + r'<[^>]+\bid=["\']lockedPlayer', webpage): + raise ExtractorError( + 'Video %s is locked' % video_id, expected=True) + + if not video_urls: + js_vars = extract_js_vars( + dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') + add_video_url(js_vars['mediastring']) + + for mobj in re.finditer( + r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage): + video_url = mobj.group('url') + if video_url not in video_urls_set: + video_urls.append((video_url, None)) + video_urls_set.add(video_url) + + upload_date = None + formats = [] + + def add_format(format_url, height=None): + ext = determine_ext(format_url) + if ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + return + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + return + if not height: + height = int_or_none(self._search_regex( + r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height', + default=None)) + formats.append({ + 'url': format_url, + 'format_id': format_field(height, template='%dp'), + 'height': height, + }) + + for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') + if '/video/get_media' in video_url: + medias = self._download_json(video_url, video_id, fatal=False) + if isinstance(medias, list): + for media in medias: + if not isinstance(media, dict): + continue + video_url = url_or_none(media.get('videoUrl')) + if not video_url: + continue + height = int_or_none(media.get('quality')) + add_format(video_url, height) + continue + add_format(video_url) + + # field_preference is unnecessary here, but kept for code-similarity with youtube-dl + self._sort_formats( + formats, field_preference=('height', 'width', 'fps', 'format_id')) + + video_uploader = self._html_search_regex( + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', + webpage, 'uploader', default=None) + + def extract_vote_count(kind, name): + return self._extract_count( + (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, + r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), + webpage, name) + + view_count = self._extract_count( + r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') + like_count = extract_vote_count('Up', 'like') + dislike_count = extract_vote_count('Down', 'dislike') + comment_count = self._extract_count( + r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + + def extract_list(meta_key): + div = self._search_regex( + r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' + % meta_key, webpage, meta_key, default=None) + if div: + return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] + + info = self._search_json_ld(webpage, video_id, default={}) + # description provided in JSON-LD is irrelevant + info['description'] = None + + return merge_dicts({ + 'id': video_id, + 'uploader': video_uploader, + 'upload_date': upload_date, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'comment_count': comment_count, + 'formats': formats, + 'age_limit': 18, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), + 'cast': extract_list('pornstars'), + 'subtitles': subtitles, + }, info) + + +class PornHubPlaylistBaseIE(PornHubBaseIE): + def _extract_page(self, url): + return int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + + def _extract_entries(self, webpage, host): + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/ytdl-org/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(<div[^>]+class=["\']container.+)', webpage, + 'container', default=webpage) + + return [ + self.url_result( + 'http://www.%s/%s' % (host, video_url), + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + container)) + ] + + +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, + }, { + # Unavailable via /videos page, but available with direct pagination + # on pornstar page (see [1]), requires premium + # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 + 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west', + 'only_matching': True, + }, { + # Same as before, multi page + 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', + 'only_matching': True, + }, { + 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + user_id = mobj.group('id') + videos_url = '%s/videos' % mobj.group('url') + page = self._extract_page(url) + if page: + videos_url = update_url_query(videos_url, {'page': page}) + return self.url_result( + videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + <li[^>]+\bclass=["\']page_next| + <link[^>]+\brel=["\']next| + <button[^>]+\bid=["\']moreDataBtn + ''', webpage) is not None + + def _entries(self, url, host, item_id): + page = self._extract_page(url) + + VIDEOS = '/videos' + + def download_page(base_url, num, fallback=False): + note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '') + return self._download_webpage( + base_url, item_id, note, query={'page': num}) + + def is_404(e): + return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 + + base_url = url + has_page = page is not None + first_page = page if has_page else 1 + for page_num in (first_page, ) if has_page else itertools.count(first_page): + try: + try: + webpage = download_page(base_url, page_num) + except ExtractorError as e: + # Some sources may not be available via /videos page, + # trying to fallback to main page pagination (see [1]) + # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 + if is_404(e) and page_num == first_page and VIDEOS in base_url: + base_url = base_url.replace(VIDEOS, '') + webpage = download_page(base_url, page_num, fallback=True) + else: + raise + except ExtractorError as e: + if is_404(e) and page_num != first_page: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + for e in page_entries: + yield e + if not self._has_more(webpage): + break + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + + return self.playlist_result(self._entries(url, host, item_id), item_id) + + +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 40, + }, { + # default sorting as Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos', + 'info_dict': { + 'id': 'channels/povd/videos', + }, + 'playlist_mincount': 293, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', + 'only_matching': True, + }, { + # Most Recent Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }, { + 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload', + 'only_matching': True, + }] + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': '44121572', + }, + 'playlist_count': 77, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351?page=2', + 'only_matching': True, + }] + + def _entries(self, url, host, item_id): + webpage = self._download_webpage(url, item_id, 'Downloading page 1') + playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id') + video_count = int_or_none( + self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count')) + token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token') + page_count = math.ceil((video_count - 36) / 40.) + 1 + page_entries = self._extract_entries(webpage, host) + + def download_page(page_num): + note = 'Downloading page {}'.format(page_num) + page_url = 'https://www.{}/playlist/viewChunked'.format(host) + return self._download_webpage(page_url, item_id, note, query={ + 'id': playlist_id, + 'page': page_num, + 'token': token, + }) + + for page_num in range(1, page_count + 1): + if page_num > 1: + webpage = download_page(page_num) + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + for e in page_entries: + yield e + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + + return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/pornotube.py b/plugins/youtube_download/yt_dlp/extractor/pornotube.py new file mode 100644 index 0000000..1b5b9a3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornotube.py @@ -0,0 +1,85 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PornotubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', + 'md5': '60fc5a4f0d93a97968fc7999d98260c9', + 'info_dict': { + 'id': '4964', + 'ext': 'mp4', + 'upload_date': '20141203', + 'title': 'Weird Hot and Wet Science', + 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', + 'categories': ['Adult Humor', 'Blondes'], + 'uploader': 'Alpha Blue Archives', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1417582800, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + token = self._download_json( + 'https://api.aebn.net/auth/v2/origins/authenticate', + video_id, note='Downloading token', + data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'http://www.pornotube.com', + })['tokenKey'] + + video_url = self._download_json( + 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, + video_id, note='Downloading delivery information', + headers={'Authorization': token})['mediaUrl'] + + FIELDS = ( + 'title', 'description', 'startSecond', 'endSecond', 'publishDate', + 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' + ) + + info = self._download_json( + 'https://api.aebn.net/content/v2/clips/%s?fields=%s' + % (video_id, ','.join(FIELDS)), video_id, + note='Downloading metadata', + headers={'Authorization': token}) + + if isinstance(info, list): + info = info[0] + + title = info['title'] + + timestamp = int_or_none(info.get('publishDate'), scale=1000) + uploader = info.get('studios', [{}])[0].get('name') + movie_id = info.get('movieId') + primary_image_number = info.get('primaryImageNumber') + thumbnail = None + if movie_id and primary_image_number: + thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( + movie_id, movie_id, primary_image_number) + start = int_or_none(info.get('startSecond')) + end = int_or_none(info.get('endSecond')) + duration = end - start if start and end else None + categories = [c['name'] for c in info.get('categories', []) if c.get('name')] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': info.get('description'), + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'categories': categories, + 'age_limit': 18, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pornovoisines.py b/plugins/youtube_download/yt_dlp/extractor/pornovoisines.py new file mode 100644 index 0000000..18459fc --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornovoisines.py @@ -0,0 +1,107 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' + + _TEST = { + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', + 'info_dict': { + 'id': '919', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': 'Recherche appartement', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20140925', + 'duration': 120, + 'view_count': int, + 'average_rating': float, + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], + 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + self._sort_formats(formats) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + # The webpage has a bug - there's no space between "thumb" and src= + thumbnail = self._html_search_regex( + r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') + + upload_date = unified_strdate(self._search_regex( + r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] + + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'categories': categories, + 'age_limit': 18, + 'subtitles': subtitles, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/pornoxo.py b/plugins/youtube_download/yt_dlp/extractor/pornoxo.py new file mode 100644 index 0000000..489dc2b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pornoxo.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import ( + str_to_int, +) + + +class PornoXOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', + 'categories': list, # NSFW + 'thumbnail': r're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id, display_id = mobj.groups() + + webpage = self._download_webpage(url, video_id) + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') + + view_count = str_to_int(self._html_search_regex( + r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + video_data.update({ + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + }) + + return video_data diff --git a/plugins/youtube_download/yt_dlp/extractor/presstv.py b/plugins/youtube_download/yt_dlp/extractor/presstv.py new file mode 100644 index 0000000..bfb2eb7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/presstv.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import remove_start + + +class PressTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' + + _TEST = { + 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', + 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', + 'info_dict': { + 'id': '459911', + 'display_id': 'Australian-sewerage-treatment-facility-', + 'ext': 'mp4', + 'title': 'Organic mattresses used to clean waste water', + 'upload_date': '20160409', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + # extract video URL from webpage + video_url = self._hidden_inputs(webpage)['inpPlayback'] + + # build list of available formats + # specified in http://www.presstv.ir/Scripts/playback.js + base_url = 'http://192.99.219.222:82/presstv' + _formats = [ + (180, '_low200.mp4'), + (360, '_low400.mp4'), + (720, '_low800.mp4'), + (1080, '.mp4') + ] + + formats = [{ + 'url': base_url + video_url[:-4] + extension, + 'format_id': '%dp' % height, + 'height': height, + } for height, extension in _formats] + + # extract video metadata + title = remove_start( + self._html_search_meta('title', webpage, fatal=True), 'PressTV-') + + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + + upload_date = '%04d%02d%02d' % ( + int(mobj.group('y')), + int(mobj.group('m')), + int(mobj.group('d')), + ) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'description': description + } diff --git a/plugins/youtube_download/yt_dlp/extractor/projectveritas.py b/plugins/youtube_download/yt_dlp/extractor/projectveritas.py new file mode 100644 index 0000000..9e9867b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/projectveritas.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_strdate, +) + + +class ProjectVeritasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', + 'info_dict': { + 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6', + 'ext': 'mp4', + 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', + 'upload_date': '20200327', + 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', + } + }, { + 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', + 'info_dict': { + 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6', + 'ext': 'mp4', + 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', + 'upload_date': '20200927', + 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', + } + }] + + def _real_extract(self, url): + id, type = self._match_valid_url(url).group('id', 'type') + api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json' + data_json = self._download_json(api_url, id)['result']['data'] + main_data = traverse_obj(data_json, 'video', 'post') + video_id = main_data['id'] + thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) + mux_asset = traverse_obj(main_data, + 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'), + get_all=False, expected_type=dict) + if not mux_asset: + raise ExtractorError('No video on the provided url.', expected=True) + playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) + formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': main_data['title'], + 'upload_date': unified_strdate(main_data.get('date')), + 'thumbnail': thumbnail.replace('//', ''), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/prosiebensat1.py b/plugins/youtube_download/yt_dlp/extractor/prosiebensat1.py new file mode 100644 index 0000000..e89bbfd --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/prosiebensat1.py @@ -0,0 +1,500 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from hashlib import sha1 +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + float_or_none, + int_or_none, + merge_dicts, + unified_strdate, +) + + +class ProSiebenSat1BaseIE(InfoExtractor): + _GEO_BYPASS = False + _ACCESS_ID = None + _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' + _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' + + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: + self.report_drm(clip_id) + + formats = [] + if self._ACCESS_ID: + raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID + protocols = self._download_json( + self._V4_BASE_URL + 'protocols', clip_id, + 'Downloading protocols JSON', + headers=self.geo_verification_headers(), query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct).encode()).hexdigest(), + 'video_id': clip_id, + }, fatal=False, expected_status=(403,)) or {} + error = protocols.get('error') or {} + if error.get('title') == 'Geo check failed': + self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) + server_token = protocols.get('server_token') + if server_token: + urls = (self._download_json( + self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), + 'protocols': self._SUPPORTED_PROTOCOLS, + 'server_token': server_token, + 'video_id': clip_id, + }, fatal=False) or {}).get('urls') or {} + for protocol, variant in urls.items(): + source_url = variant.get('clear', {}).get('url') + if not source_url: + continue + if protocol == 'dash': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id=protocol, fatal=False)) + elif protocol == 'hls': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id=protocol, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': protocol, + }) + if not formats: + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif mimetype == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id='dash', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + self._sort_formats(formats) + + return { + 'duration': float_or_none(video.get('duration')), + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): + IE_NAME = 'prosiebensat1' + IE_DESC = 'ProSiebenSat.1 Digital' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?:beta\.)? + (?: + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia + )\.(?:de|at|ch)| + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video + ) + /(?P<id>.+) + ''' + + _TESTS = [ + { + # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 + # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: + # - malformed f4m manifest support + # - proper handling of URLs starting with `https?://` in 2.0 manifests + # - recursive child f4m manifests extraction + 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', + 'info_dict': { + 'id': '2104602', + 'ext': 'mp4', + 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', + 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', + 'upload_date': '20131231', + 'duration': 5845.04, + 'series': 'CIRCUS HALLIGALLI', + 'season_number': 2, + 'episode': 'Episode 18 - Staffel 2', + 'episode_number': 18, + }, + }, + { + 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', + 'info_dict': { + 'id': '2570327', + 'ext': 'mp4', + 'title': 'Lady-Umstyling für Audrina', + 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', + 'upload_date': '20131014', + 'duration': 606.76, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Seems to be broken', + }, + { + 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', + 'info_dict': { + 'id': '2429369', + 'ext': 'mp4', + 'title': 'Countdown für die Autowerkstatt', + 'description': 'md5:809fc051a457b5d8666013bc40698817', + 'upload_date': '20140223', + 'duration': 2595.04, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', + 'info_dict': { + 'id': '2904997', + 'ext': 'mp4', + 'title': 'Sexy laufen in Ugg Boots', + 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', + 'upload_date': '20140122', + 'duration': 245.32, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', + 'info_dict': { + 'id': '2906572', + 'ext': 'mp4', + 'title': 'Im Interview: Kai Wiesinger', + 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', + 'upload_date': '20140203', + 'duration': 522.56, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', + 'info_dict': { + 'id': '2992323', + 'ext': 'mp4', + 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', + 'description': 'md5:2669cde3febe9bce13904f701e774eb6', + 'upload_date': '20141014', + 'duration': 2410.44, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', + 'info_dict': { + 'id': '3004256', + 'ext': 'mp4', + 'title': 'Schalke: Tönnies möchte Raul zurück', + 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', + 'upload_date': '20140226', + 'duration': 228.96, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', + 'info_dict': { + 'id': '2572814', + 'ext': 'mp4', + 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', + 'description': 'md5:6ddb02b0781c6adf778afea606652e38', + 'timestamp': 1382041620, + 'upload_date': '20131017', + 'duration': 469.88, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', + 'info_dict': { + 'id': '2156342', + 'ext': 'mp4', + 'title': 'Kurztrips zum Valentinstag', + 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', + 'duration': 307.24, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', + 'info_dict': { + 'id': '439664', + 'title': 'Episode 8 - Ganze Folge - Playlist', + 'description': 'md5:63b8963e71f481782aeea877658dec84', + }, + 'playlist_count': 2, + 'skip': 'This video is unavailable', + }, + { + # title in <h2 class="subtitle"> + 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', + 'info_dict': { + 'id': '4895826', + 'ext': 'mp4', + 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', + 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', + 'upload_date': '20170302', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'geo restricted to Germany', + }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, + ] + + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' + + _ACCESS_ID = 'x_prosiebenmaxx-de' + _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' + _IV = 'Aeluchoc6aevechuipiexeeboowedaok' + + _CLIPID_REGEXES = [ + r'"clip_id"\s*:\s+"(\d+)"', + r'clipid: "(\d+)"', + r'clip[iI]d=(\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', + r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", + r'proMamsId"\s*:\s*"(\d+)', + r'proMamsId"\s*:\s*"(\d+)', + ] + _TITLE_REGEXES = [ + r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', + r'<header class="clearfix">\s*<h3>(.+?)</h3>', + r'<!-- start video -->\s*<h1>(.+?)</h1>', + r'<h1 class="att-name">\s*(.+?)</h1>', + r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', + r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', + r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', + r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', + ] + _DESCRIPTION_REGEXES = [ + r'<p itemprop="description">\s*(.+?)</p>', + r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', + r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', + r'<p class="att-description">\s*(.+?)\s*</p>', + r'<p class="video-description" itemprop="description">\s*(.+?)</p>', + r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', + ] + _UPLOAD_DATE_REGEXES = [ + r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', + r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', + r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', + r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', + ] + _PAGE_TYPE_REGEXES = [ + r'<meta name="page_type" content="([^"]+)">', + r"'itemType'\s*:\s*'([^']*)'", + ] + _PLAYLIST_ID_REGEXES = [ + r'content[iI]d=(\d+)', + r"'itemId'\s*:\s*'([^']*)'", + ] + _PLAYLIST_CLIP_REGEXES = [ + r'(?s)data-qvt=.+?<a href="([^"]+)"', + ] + + def _extract_clip(self, url, webpage): + clip_id = self._html_search_regex( + self._CLIPID_REGEXES, webpage, 'clip id') + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title', + default=None) or self._og_search_title(webpage) + info = self._extract_video_info(url, clip_id) + description = self._html_search_regex( + self._DESCRIPTION_REGEXES, webpage, 'description', default=None) + if description is None: + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate( + self._html_search_meta('og:published_time', webpage, + 'upload date', default=None) + or self._html_search_regex(self._UPLOAD_DATE_REGEXES, + webpage, 'upload date', default=None)) + + json_ld = self._search_json_ld(webpage, clip_id, default={}) + + return merge_dicts(info, { + 'id': clip_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + }, json_ld) + + def _extract_playlist(self, url, webpage): + playlist_id = self._html_search_regex( + self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') + playlist = self._parse_json( + self._search_regex( + r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + page_type = self._search_regex( + self._PAGE_TYPE_REGEXES, webpage, + 'page type', default='clip').lower() + if page_type == 'clip': + return self._extract_clip(url, webpage) + elif page_type == 'playlist': + return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) diff --git a/plugins/youtube_download/yt_dlp/extractor/prx.py b/plugins/youtube_download/yt_dlp/extractor/prx.py new file mode 100644 index 0000000..80561b8 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/prx.py @@ -0,0 +1,431 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +from .common import InfoExtractor, SearchInfoExtractor +from ..utils import ( + urljoin, + traverse_obj, + int_or_none, + mimetype2ext, + clean_html, + url_or_none, + unified_timestamp, + str_or_none, +) + + +class PRXBaseIE(InfoExtractor): + PRX_BASE_URL_RE = r'https?://(?:(?:beta|listen)\.)?prx.org/%s' + + def _call_api(self, item_id, path, query=None, fatal=True, note='Downloading CMS API JSON'): + return self._download_json( + urljoin('https://cms.prx.org/api/v1/', path), item_id, query=query, fatal=fatal, note=note) + + @staticmethod + def _get_prx_embed_response(response, section): + return traverse_obj(response, ('_embedded', f'prx:{section}')) + + @staticmethod + def _extract_file_link(response): + return url_or_none(traverse_obj( + response, ('_links', 'enclosure', 'href'), expected_type=str)) + + @classmethod + def _extract_image(cls, image_response): + if not isinstance(image_response, dict): + return + return { + 'id': str_or_none(image_response.get('id')), + 'filesize': image_response.get('size'), + 'width': image_response.get('width'), + 'height': image_response.get('height'), + 'url': cls._extract_file_link(image_response) + } + + @classmethod + def _extract_base_info(cls, response): + if not isinstance(response, dict): + return + item_id = str_or_none(response.get('id')) + if not item_id: + return + thumbnail_dict = cls._extract_image(cls._get_prx_embed_response(response, 'image')) + description = ( + clean_html(response.get('description')) + or response.get('shortDescription')) + return { + 'id': item_id, + 'title': response.get('title') or item_id, + 'thumbnails': [thumbnail_dict] if thumbnail_dict else None, + 'description': description, + 'release_timestamp': unified_timestamp(response.get('releasedAt')), + 'timestamp': unified_timestamp(response.get('createdAt')), + 'modified_timestamp': unified_timestamp(response.get('updatedAt')), + 'duration': int_or_none(response.get('duration')), + 'tags': response.get('tags'), + 'episode_number': int_or_none(response.get('episodeIdentifier')), + 'season_number': int_or_none(response.get('seasonIdentifier')) + } + + @classmethod + def _extract_series_info(cls, series_response): + base_info = cls._extract_base_info(series_response) + if not base_info: + return + account_info = cls._extract_account_info( + cls._get_prx_embed_response(series_response, 'account')) or {} + return { + **base_info, + 'channel_id': account_info.get('channel_id'), + 'channel_url': account_info.get('channel_url'), + 'channel': account_info.get('channel'), + 'series': base_info.get('title'), + 'series_id': base_info.get('id'), + } + + @classmethod + def _extract_account_info(cls, account_response): + base_info = cls._extract_base_info(account_response) + if not base_info: + return + name = account_response.get('name') + return { + **base_info, + 'title': name, + 'channel_id': base_info.get('id'), + 'channel_url': 'https://beta.prx.org/accounts/%s' % base_info.get('id'), + 'channel': name, + } + + @classmethod + def _extract_story_info(cls, story_response): + base_info = cls._extract_base_info(story_response) + if not base_info: + return + series = cls._extract_series_info( + cls._get_prx_embed_response(story_response, 'series')) or {} + account = cls._extract_account_info( + cls._get_prx_embed_response(story_response, 'account')) or {} + return { + **base_info, + 'series': series.get('series'), + 'series_id': series.get('series_id'), + 'channel_id': account.get('channel_id'), + 'channel_url': account.get('channel_url'), + 'channel': account.get('channel') + } + + def _entries(self, item_id, endpoint, entry_func, query=None): + """ + Extract entries from paginated list API + @param entry_func: Function to generate entry from response item + """ + total = 0 + for page in itertools.count(1): + response = self._call_api(f'{item_id}: page {page}', endpoint, query={ + **(query or {}), + 'page': page, + 'per': 100 + }) + items = self._get_prx_embed_response(response, 'items') + if not response or not items: + break + + yield from filter(None, map(entry_func, items)) + + total += response['count'] + if total >= response['total']: + break + + def _story_playlist_entry(self, response): + story = self._extract_story_info(response) + if not story: + return + story.update({ + '_type': 'url', + 'url': 'https://beta.prx.org/stories/%s' % story['id'], + 'ie_key': PRXStoryIE.ie_key() + }) + return story + + def _series_playlist_entry(self, response): + series = self._extract_series_info(response) + if not series: + return + series.update({ + '_type': 'url', + 'url': 'https://beta.prx.org/series/%s' % series['id'], + 'ie_key': PRXSeriesIE.ie_key() + }) + return series + + +class PRXStoryIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'stories/(?P<id>\d+)' + + _TESTS = [ + { + # Story with season and episode details + 'url': 'https://beta.prx.org/stories/399200', + 'info_dict': { + 'id': '399200', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 1004, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + }, + 'playlist': [{ + 'info_dict': { + 'id': '399200_part1', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 530, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + 'ext': 'mp3', + 'upload_date': '20211222', + 'episode': 'Episode 8', + 'release_date': '20211223', + 'season': 'Season 5', + 'modified_date': '20220104' + } + }, { + 'info_dict': { + 'id': '399200_part2', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 474, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + 'ext': 'mp3', + 'upload_date': '20211222', + 'episode': 'Episode 8', + 'release_date': '20211223', + 'season': 'Season 5', + 'modified_date': '20220104' + } + } + + ] + }, { + # Story with only split audio + 'url': 'https://beta.prx.org/stories/326414', + 'info_dict': { + 'id': '326414', + 'title': 'Massachusetts v EPA', + 'description': 'md5:744fffba08f19f4deab69fa8d49d5816', + 'timestamp': 1592509124, + 'modified_timestamp': 1592510457, + 'duration': 3088, + 'tags': 'count:0', + 'series': 'Outside/In', + 'series_id': '36252', + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + }, + 'playlist_count': 4 + }, { + # Story with single combined audio + 'url': 'https://beta.prx.org/stories/400404', + 'info_dict': { + 'id': '400404', + 'title': 'Cafe Chill (Episode 2022-01)', + 'thumbnails': 'count:1', + 'description': 'md5:9f1b5a3cbd64fb159d08c3baa31f1539', + 'timestamp': 1641233952, + 'modified_timestamp': 1641234248, + 'duration': 3540, + 'series': 'Café Chill', + 'series_id': '37762', + 'channel_id': '5767', + 'channel_url': 'https://beta.prx.org/accounts/5767', + 'channel': 'C89.5 - KNHC Seattle', + 'ext': 'mp3', + 'tags': 'count:0', + 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg', + 'upload_date': '20220103', + 'modified_date': '20220103' + } + }, { + 'url': 'https://listen.prx.org/stories/399200', + 'only_matching': True + } + ] + + def _extract_audio_pieces(self, audio_response): + return [{ + 'format_id': str_or_none(piece_response.get('id')), + 'format_note': str_or_none(piece_response.get('label')), + 'filesize': int_or_none(piece_response.get('size')), + 'duration': int_or_none(piece_response.get('duration')), + 'ext': mimetype2ext(piece_response.get('contentType')), + 'asr': int_or_none(piece_response.get('frequency'), scale=1000), + 'abr': int_or_none(piece_response.get('bitRate')), + 'url': self._extract_file_link(piece_response), + 'vcodec': 'none' + } for piece_response in sorted( + self._get_prx_embed_response(audio_response, 'items') or [], + key=lambda p: int_or_none(p.get('position')))] + + def _extract_story(self, story_response): + info = self._extract_story_info(story_response) + if not info: + return + audio_pieces = self._extract_audio_pieces( + self._get_prx_embed_response(story_response, 'audio')) + if len(audio_pieces) == 1: + return { + 'formats': audio_pieces, + **info + } + + entries = [{ + **info, + 'id': '%s_part%d' % (info['id'], (idx + 1)), + 'formats': [fmt], + } for idx, fmt in enumerate(audio_pieces)] + return { + '_type': 'multi_video', + 'entries': entries, + **info + } + + def _real_extract(self, url): + story_id = self._match_id(url) + response = self._call_api(story_id, f'stories/{story_id}') + return self._extract_story(response) + + +class PRXSeriesIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'series/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'https://beta.prx.org/series/36252', + 'info_dict': { + 'id': '36252', + 'title': 'Outside/In', + 'thumbnails': 'count:1', + 'description': 'md5:a6bedc5f810777bcb09ab30ff9059114', + 'timestamp': 1470684964, + 'modified_timestamp': 1582308830, + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'series': 'Outside/In', + 'series_id': '36252' + }, + 'playlist_mincount': 39 + }, { + # Blank series + 'url': 'https://beta.prx.org/series/25038', + 'info_dict': { + 'id': '25038', + 'title': '25038', + 'timestamp': 1207612800, + 'modified_timestamp': 1207612800, + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'series': '25038', + 'series_id': '25038' + }, + 'playlist_count': 0 + } + ] + + def _extract_series(self, series_response): + info = self._extract_series_info(series_response) + return { + '_type': 'playlist', + 'entries': self._entries(info['id'], 'series/%s/stories' % info['id'], self._story_playlist_entry), + **info + } + + def _real_extract(self, url): + series_id = self._match_id(url) + response = self._call_api(series_id, f'series/{series_id}') + return self._extract_series(response) + + +class PRXAccountIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'accounts/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://beta.prx.org/accounts/206', + 'info_dict': { + 'id': '206', + 'title': 'New Hampshire Public Radio', + 'description': 'md5:277f2395301d0aca563c80c70a18ee0a', + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'thumbnails': 'count:1' + }, + 'playlist_mincount': 380 + }] + + def _extract_account(self, account_response): + info = self._extract_account_info(account_response) + series = self._entries( + info['id'], f'accounts/{info["id"]}/series', self._series_playlist_entry) + stories = self._entries( + info['id'], f'accounts/{info["id"]}/stories', self._story_playlist_entry) + return { + '_type': 'playlist', + 'entries': itertools.chain(series, stories), + **info + } + + def _real_extract(self, url): + account_id = self._match_id(url) + response = self._call_api(account_id, f'accounts/{account_id}') + return self._extract_account(response) + + +class PRXStoriesSearchIE(PRXBaseIE, SearchInfoExtractor): + IE_DESC = 'PRX Stories Search' + IE_NAME = 'prxstories:search' + _SEARCH_KEY = 'prxstories' + + def _search_results(self, query): + yield from self._entries( + f'query {query}', 'stories/search', self._story_playlist_entry, query={'q': query}) + + +class PRXSeriesSearchIE(PRXBaseIE, SearchInfoExtractor): + IE_DESC = 'PRX Series Search' + IE_NAME = 'prxseries:search' + _SEARCH_KEY = 'prxseries' + + def _search_results(self, query): + yield from self._entries( + f'query {query}', 'series/search', self._series_playlist_entry, query={'q': query}) diff --git a/plugins/youtube_download/yt_dlp/extractor/puhutv.py b/plugins/youtube_download/yt_dlp/extractor/puhutv.py new file mode 100644 index 0000000..ca71665 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/puhutv.py @@ -0,0 +1,239 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + parse_resolution, + str_or_none, + try_get, + unified_timestamp, + url_or_none, + urljoin, +) + + +class PuhuTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' + IE_NAME = 'puhutv' + _TESTS = [{ + # film + 'url': 'https://puhutv.com/sut-kardesler-izle', + 'md5': 'a347470371d56e1585d1b2c8dab01c96', + 'info_dict': { + 'id': '5085', + 'display_id': 'sut-kardesler', + 'ext': 'mp4', + 'title': 'Süt Kardeşler', + 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4832.44, + 'creator': 'Arzu Film', + 'timestamp': 1561062602, + 'upload_date': '20190620', + 'release_year': 1976, + 'view_count': int, + 'tags': list, + }, + }, { + # episode, geo restricted, bypassable with --geo-verification-proxy + 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', + 'only_matching': True, + }, { + # 4k, with subtitles + 'url': 'https://puhutv.com/dip-1-bolum-izle', + 'only_matching': True, + }] + _SUBTITLE_LANGS = { + 'English': 'en', + 'Deutsch': 'de', + 'عربى': 'ar' + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-izle' % display_id), + display_id)['data'] + + video_id = compat_str(info['id']) + show = info.get('title') or {} + title = info.get('name') or show['name'] + if info.get('display_name'): + title = '%s %s' % (title, info['display_name']) + + try: + videos = self._download_json( + 'https://puhutv.com/api/assets/%s/videos' % video_id, + display_id, 'Downloading video JSON', + headers=self.geo_verification_headers()) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted() + raise + + urls = [] + formats = [] + + for video in videos['data']['videos']: + media_url = url_or_none(video.get('url')) + if not media_url or media_url in urls: + continue + urls.append(media_url) + + playlist = video.get('is_playlist') + if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + + quality = int_or_none(video.get('quality')) + f = { + 'url': media_url, + 'ext': 'mp4', + 'height': quality + } + video_format = video.get('video_format') + is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False + if is_hls: + format_id = 'hls' + f['protocol'] = 'm3u8_native' + elif video_format == 'mp4': + format_id = 'http' + else: + continue + if quality: + format_id += '-%sp' % quality + f['format_id'] = format_id + formats.append(f) + self._sort_formats(formats) + + creator = try_get( + show, lambda x: x['producer']['name'], compat_str) + + content = info.get('content') or {} + + images = try_get( + content, lambda x: x['images']['wide'], dict) or {} + thumbnails = [] + for image_id, image_url in images.items(): + if not isinstance(image_url, compat_str): + continue + if not image_url.startswith(('http', '//')): + image_url = 'https://%s' % image_url + t = parse_resolution(image_id) + t.update({ + 'id': image_id, + 'url': image_url + }) + thumbnails.append(t) + + tags = [] + for genre in show.get('genres') or []: + if not isinstance(genre, dict): + continue + genre_name = genre.get('name') + if genre_name and isinstance(genre_name, compat_str): + tags.append(genre_name) + + subtitles = {} + for subtitle in content.get('subtitles') or []: + if not isinstance(subtitle, dict): + continue + lang = subtitle.get('language') + sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) + if not lang or not isinstance(lang, compat_str) or not sub_url: + continue + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ + 'url': sub_url + }] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description') or show.get('description'), + 'season_id': str_or_none(info.get('season_id')), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'release_year': int_or_none(show.get('released_at')), + 'timestamp': unified_timestamp(info.get('created_at')), + 'creator': creator, + 'view_count': int_or_none(content.get('watch_count')), + 'duration': float_or_none(content.get('duration_in_ms'), 1000), + 'tags': tags, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'formats': formats + } + + +class PuhuTVSerieIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' + IE_NAME = 'puhutv:serie' + _TESTS = [{ + 'url': 'https://puhutv.com/deniz-yildizi-detay', + 'info_dict': { + 'title': 'Deniz Yıldızı', + 'id': 'deniz-yildizi', + }, + 'playlist_mincount': 205, + }, { + # a film detail page which is using same url with serie page + 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', + 'only_matching': True, + }] + + def _extract_entries(self, seasons): + for season in seasons: + season_id = season.get('id') + if not season_id: + continue + page = 1 + has_more = True + while has_more is True: + season = self._download_json( + 'https://galadriel.puhutv.com/seasons/%s' % season_id, + season_id, 'Downloading page %s' % page, query={ + 'page': page, + 'per': 40, + }) + episodes = season.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + slug_path = str_or_none(ep.get('slugPath')) + if not slug_path: + continue + video_id = str_or_none(int_or_none(ep.get('id'))) + yield self.url_result( + 'https://puhutv.com/%s' % slug_path, + ie=PuhuTVIE.ie_key(), video_id=video_id, + video_title=ep.get('name') or ep.get('eventLabel')) + page += 1 + has_more = season.get('hasMore') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-detay' % playlist_id), + playlist_id)['data'] + + seasons = info.get('seasons') + if seasons: + return self.playlist_result( + self._extract_entries(seasons), playlist_id, info.get('name')) + + # For films, these are using same url with series + video_id = info.get('slug') or info['assets'][0]['slug'] + return self.url_result( + 'https://puhutv.com/%s-izle' % video_id, + PuhuTVIE.ie_key(), video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/puls4.py b/plugins/youtube_download/yt_dlp/extractor/puls4.py new file mode 100644 index 0000000..80091b8 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/puls4.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .prosiebensat1 import ProSiebenSat1BaseIE +from ..utils import ( + unified_strdate, + parse_duration, + compat_str, +) + + +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' + _TESTS = [{ + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', + 'info_dict': { + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', + 'uploader': 'PULS_4', + }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, + }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' + + def _real_extract(self, url): + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/pyvideo.py b/plugins/youtube_download/yt_dlp/extractor/pyvideo.py new file mode 100644 index 0000000..8696197 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/pyvideo.py @@ -0,0 +1,72 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class PyvideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' + + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', + }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', + }, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + category = mobj.group('category') + video_id = mobj.group('id') + + entries = [] + + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) + + if data: + for video in data['videos']: + video_url = video.get('url') + if video_url: + if video.get('type') == 'youtube': + entries.append(self.url_result(video_url, 'Youtube')) + else: + entries.append({ + 'id': compat_str(data.get('id') or video_id), + 'url': video_url, + 'title': data['title'], + 'description': data.get('description') or data.get('summary'), + 'thumbnail': data.get('thumbnail_url'), + 'duration': int_or_none(data.get('duration')), + }) + else: + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + media_urls = self._search_regex( + r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') + for m in re.finditer( + r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): + media_url = m.group('url') + if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): + entries.append(self.url_result(media_url, 'Youtube')) + else: + entries.append({ + 'id': video_id, + 'url': media_url, + 'title': title, + }) + + return self.playlist_result(entries, video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/qqmusic.py b/plugins/youtube_download/yt_dlp/extractor/qqmusic.py new file mode 100644 index 0000000..0106d16 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/qqmusic.py @@ -0,0 +1,369 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re +import time + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + strip_jsonp, + unescapeHTML, +) + + +class QQMusicIE(InfoExtractor): + IE_NAME = 'qqmusic' + IE_DESC = 'QQ音乐' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', + 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'mp3', + 'title': '可惜没如果', + 'release_date': '20141227', + 'creator': '林俊杰', + 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'There is no mp3-320 version of this song.', + 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', + 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', + 'info_dict': { + 'id': '004MsGEo3DdNxV', + 'ext': 'mp3', + 'title': '如果', + 'release_date': '20050626', + 'creator': '李季美', + 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'lyrics not in .lrc format', + 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', + 'info_dict': { + 'id': '001JyApY11tIp6', + 'ext': 'mp3', + 'title': 'Shadows Over Transylvania', + 'release_date': '19970225', + 'creator': 'Dark Funeral', + 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }] + + _FORMATS = { + 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, + 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, + 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} + } + + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def m_r_get_ruin(): + curMs = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * curMs % 1E10) + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download song detail info', + errnote='Unable to get song detail info', encoding='gbk') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', + detail_info_page, 'LRC lyrics', default=None) + if lrc_content: + lrc_content = lrc_content.replace('\\n', '\n') + + thumbnail_url = None + albummid = self._search_regex( + [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], + detail_info_page, 'album mid', default=None) + if albummid: + thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ + % (albummid[-2:-1], albummid[-1], albummid) + + guid = self.m_r_get_ruin() + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + + formats = [] + for format_id, details in self._FORMATS.items(): + formats.append({ + 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' + % (details['prefix'], mid, details['ext'], vkey, guid), + 'format': format_id, + 'format_id': format_id, + 'quality': details['preference'], + 'abr': details.get('abr'), + }) + self._check_formats(formats, mid) + self._sort_formats(formats) + + actual_lrc_lyrics = ''.join( + line + '\n' for line in re.findall( + r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) + + info_dict = { + 'id': mid, + 'formats': formats, + 'title': song_name, + 'release_date': publish_time, + 'creator': singer, + 'description': lrc_content, + 'thumbnail': thumbnail_url + } + if actual_lrc_lyrics: + info_dict['subtitles'] = { + 'origin': [{ + 'ext': 'lrc', + 'data': actual_lrc_lyrics, + }] + } + return info_dict + + +class QQPlaylistBaseIE(InfoExtractor): + @staticmethod + def qq_static_url(category, mid): + return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + + def get_singer_all_songs(self, singmid, num): + return self._download_webpage( + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, + query={ + 'format': 'json', + 'inCharset': 'utf8', + 'outCharset': 'utf-8', + 'platform': 'yqq', + 'needNewCode': 0, + 'singermid': singmid, + 'order': 'listen', + 'begin': 0, + 'num': num, + 'songstatus': 1, + }) + + def get_entries_from_page(self, singmid): + entries = [] + + default_num = 1 + json_text = self.get_singer_all_songs(singmid, default_num) + json_obj_all_songs = self._parse_json(json_text, singmid) + + if json_obj_all_songs['code'] == 0: + total = json_obj_all_songs['data']['total'] + json_text = self.get_singer_all_songs(singmid, total) + json_obj_all_songs = self._parse_json(json_text, singmid) + + for item in json_obj_all_songs['data']['list']: + if item['musicData'].get('songmid') is not None: + songmid = item['musicData']['songmid'] + entries.append(self.url_result( + r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) + + return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:singer' + IE_DESC = 'QQ音乐 - 歌手' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' + _TEST = { + 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', + 'info_dict': { + 'id': '001BLpXF2DyJe2', + 'title': '林俊杰', + 'description': 'md5:870ec08f7d8547c29c93010899103751', + }, + 'playlist_mincount': 12, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + entries = self.get_entries_from_page(mid) + singer_page = self._download_webpage(url, mid, 'Download singer page') + singer_name = self._html_search_regex( + r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) + singer_desc = None + + if mid: + singer_desc_page = self._download_xml( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, + 'Donwload singer description XML', + query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, + headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) + + singer_desc = singer_desc_page.find('./data/info/desc').text + + return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:album' + IE_DESC = 'QQ音乐 - 专辑' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', + 'info_dict': { + 'id': '000gXCTb2AhRR1', + 'title': '我们都是这样长大的', + 'description': 'md5:179c5dce203a5931970d306aa9607ea6', + }, + 'playlist_count': 4, + }, { + 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', + 'info_dict': { + 'id': '002Y5a3b3AlCu3', + 'title': '그리고...', + 'description': 'md5:a48823755615508a95080e81b51ba729', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + mid = self._match_id(url) + + album = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, + mid, 'Download album page')['data'] + + entries = [ + self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] + ) for song in album['list'] + ] + album_name = album.get('name') + album_detail = album.get('desc') + if album_detail is not None: + album_detail = album_detail.strip() + + return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:toplist' + IE_DESC = 'QQ音乐 - 排行榜' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/toplist/123.html', + 'info_dict': { + 'id': '123', + 'title': '美国iTunes榜', + 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/3.html', + 'info_dict': { + 'id': '3', + 'title': '巅峰榜·欧美', + 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/106.html', + 'info_dict': { + 'id': '106', + 'title': '韩国Mnet榜', + 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', + }, + 'playlist_count': 50, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + toplist_json = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, + note='Download toplist page', + query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) + + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', + song['data']['songmid']) + for song in toplist_json['songlist']] + + topinfo = toplist_json.get('topinfo', {}) + list_name = topinfo.get('ListName') + list_description = topinfo.get('info') + return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:playlist' + IE_DESC = 'QQ音乐 - 歌单' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', + 'info_dict': { + 'id': '3462654915', + 'title': '韩国5月新歌精选下旬', + 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', + }, + 'playlist_count': 40, + 'skip': 'playlist gone', + }, { + 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', + 'info_dict': { + 'id': '1374105607', + 'title': '易入人心的华语民谣', + 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_json = self._download_json( + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', + list_id, 'Download list page', + query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, + transform_source=strip_jsonp) + if not len(list_json.get('cdlist', [])): + if list_json.get('code'): + raise ExtractorError( + 'QQ Music said: error %d in fetching playlist info' % list_json['code'], + expected=True) + raise ExtractorError('Unable to get playlist info') + + cdlist = list_json['cdlist'][0] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) + for song in cdlist['songlist']] + + list_name = cdlist.get('dissname') + list_description = clean_html(unescapeHTML(cdlist.get('desc'))) + return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/plugins/youtube_download/yt_dlp/extractor/r7.py b/plugins/youtube_download/yt_dlp/extractor/r7.py new file mode 100644 index 0000000..e2202d6 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/r7.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class R7IE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| + noticias\.r7\.com(?:/[^/]+)+/[^/]+-| + player\.r7\.com/video/i/ + ) + (?P<id>[\da-f]{24}) + ''' + _TESTS = [{ + 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', + 'md5': '403c4e393617e8e8ddc748978ee8efde', + 'info_dict': { + 'id': '54e7050b0cf2ff57e0279389', + 'ext': 'mp4', + 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', + 'description': 'md5:01812008664be76a6479aa58ec865b72', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 98, + 'like_count': int, + 'view_count': int, + }, + }, { + 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', + 'only_matching': True, + }, { + 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', + 'only_matching': True, + }, { + 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://player-api.r7.com/video/i/%s' % video_id, video_id) + + title = video['title'] + + formats = [] + media_url_hls = video.get('media_url_hls') + if media_url_hls: + formats.extend(self._extract_m3u8_formats( + media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + media_url = video.get('media_url') + if media_url: + f = { + 'url': media_url, + 'format_id': 'http', + } + # m3u8 format always matches the http format, let's copy metadata from + # one to another + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none', formats)) + if len(m3u8_formats) == 1: + f_copy = m3u8_formats[0].copy() + f_copy.update(f) + f_copy['protocol'] = 'http' + f = f_copy + formats.append(f) + self._sort_formats(formats) + + description = video.get('description') + thumbnail = video.get('thumb') + duration = int_or_none(video.get('media_duration')) + like_count = int_or_none(video.get('likes')) + view_count = int_or_none(video.get('views')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'view_count': view_count, + 'formats': formats, + } + + +class R7ArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' + _TEST = { + 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', + 'only_matching': True, + } + + @classmethod + def suitable(cls, url): + return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', + webpage, 'video id') + + return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) diff --git a/plugins/youtube_download/yt_dlp/extractor/radiko.py b/plugins/youtube_download/yt_dlp/extractor/radiko.py new file mode 100644 index 0000000..1e60de1 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiko.py @@ -0,0 +1,234 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import base64 +import calendar +import datetime + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + update_url_query, + clean_html, + unified_timestamp, +) +from ..compat import compat_urllib_parse + + +class RadikoBaseIE(InfoExtractor): + _FULL_KEY = None + + def _auth_client(self): + auth_cache = self._downloader.cache.load('radiko', 'auth_data') + if auth_cache: + return auth_cache + + _, auth1_handle = self._download_webpage_handle( + 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', + headers={ + 'x-radiko-app': 'pc_html5', + 'x-radiko-app-version': '0.0.1', + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + }) + auth1_header = auth1_handle.info() + + auth_token = auth1_header['X-Radiko-AuthToken'] + kl = int(auth1_header['X-Radiko-KeyLength']) + ko = int(auth1_header['X-Radiko-KeyOffset']) + raw_partial_key = self._extract_full_key()[ko:ko + kl] + partial_key = base64.b64encode(raw_partial_key).decode() + + area_id = self._download_webpage( + 'https://radiko.jp/v2/api/auth2', None, 'Authenticating', + headers={ + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + 'x-radiko-authtoken': auth_token, + 'x-radiko-partialkey': partial_key, + }).split(',')[0] + + auth_data = (auth_token, area_id) + self._downloader.cache.store('radiko', 'auth_data', auth_data) + return auth_data + + def _extract_full_key(self): + if self._FULL_KEY: + return self._FULL_KEY + + jscode = self._download_webpage( + 'https://radiko.jp/apps/js/playerCommon.js', None, + note='Downloading player js code') + full_key = self._search_regex( + (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"), + jscode, 'full key', fatal=False, group='fullkey') + + if full_key: + full_key = full_key.encode() + else: # use full key ever known + full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' + + self._FULL_KEY = full_key + return full_key + + def _find_program(self, video_id, station, cursor): + station_program = self._download_xml( + 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id, + note='Downloading radio program for %s station' % station) + + prog = None + for p in station_program.findall('.//prog'): + ft_str, to_str = p.attrib['ft'], p.attrib['to'] + ft = unified_timestamp(ft_str, False) + to = unified_timestamp(to_str, False) + if ft <= cursor and cursor < to: + prog = p + break + if not prog: + raise ExtractorError('Cannot identify radio program to download!') + assert ft, to + return prog, station_program, ft, ft_str, to_str + + def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): + m3u8_playlist_data = self._download_xml( + 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id, + note='Downloading m3u8 information') + m3u8_urls = m3u8_playlist_data.findall('.//url') + + formats = [] + found = set() + for url_tag in m3u8_urls: + pcu = url_tag.find('playlist_create_url') + url_attrib = url_tag.attrib + playlist_url = update_url_query(pcu.text, { + 'station_id': station, + **query, + 'l': '15', + 'lsid': '77d0678df93a1034659c14d6fc89f018', + 'type': 'b', + }) + if playlist_url in found: + continue + else: + found.add(playlist_url) + + time_to_skip = None if is_onair else cursor - ft + + subformats = self._extract_m3u8_formats( + playlist_url, video_id, ext='m4a', + live=True, fatal=False, m3u8_id=None, + headers={ + 'X-Radiko-AreaId': area_id, + 'X-Radiko-AuthToken': auth_token, + }) + for sf in subformats: + domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc + if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain): + # Prioritize live radio vs playback based on extractor + sf['preference'] = 100 if is_onair else -100 + if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: + sf['_ffmpeg_args'] = ['-ss', time_to_skip] + formats.extend(subformats) + + self._sort_formats(formats) + return formats + + +class RadikoIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' + + _TESTS = [{ + # QRR (文化放送) station provides <desc> + 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide <desc> + 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000', + 'only_matching': True, + }] + + def _real_extract(self, url): + station, video_id = self._match_valid_url(url).groups() + vid_int = unified_timestamp(video_id, False) + + auth_token, area_id = self._auth_client() + + prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) + + title = prog.find('title').text + description = clean_html(prog.find('info').text) + station_name = station_program.find('.//name').text + + formats = self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id, + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': station_name, + 'uploader_id': station, + 'timestamp': vid_int, + 'formats': formats, + 'is_live': True, + } + + +class RadikoRadioIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)' + + _TESTS = [{ + # QRR (文化放送) station provides <desc> + 'url': 'https://radiko.jp/#!/live/QRR', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide <desc> + 'url': 'https://radiko.jp/#!/live/FMT', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/live/JOAK-FM', + 'only_matching': True, + }] + + def _real_extract(self, url): + station = self._match_id(url) + self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop') + + auth_token, area_id = self._auth_client() + # get current time in JST (GMT+9:00 w/o DST) + vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))) + vid_now = calendar.timegm(vid_now.timetuple()) + + prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) + + title = prog.find('title').text + description = clean_html(prog.find('info').text) + station_name = station_program.find('.//name').text + + formats = self._extract_formats( + video_id=station, station=station, is_onair=True, + ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id, + query={}) + + return { + 'id': station, + 'title': title, + 'description': description, + 'uploader': station_name, + 'uploader_id': station, + 'timestamp': ft, + 'formats': formats, + 'is_live': True, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiobremen.py b/plugins/youtube_download/yt_dlp/extractor/radiobremen.py new file mode 100644 index 0000000..2c35f98 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiobremen.py @@ -0,0 +1,63 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/?id=141876', + 'info_dict': { + 'id': '141876', + 'ext': 'mp4', + 'duration': 178, + 'width': 512, + 'title': 'Druck auf Patrick Öztürk', + 'thumbnail': r're:https?://.*\.jpg$', + 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title') + description = self._html_search_regex( + r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False) + duration = parse_duration(self._html_search_regex( + r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>', + meta_doc, 'duration', fatal=False)) + + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group('width')), + }] + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiocanada.py b/plugins/youtube_download/yt_dlp/extractor/radiocanada.py new file mode 100644 index 0000000..4b4445c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiocanada.py @@ -0,0 +1,170 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + unified_strdate, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'mp4', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, + { + # empty Title + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', + 'info_dict': { + 'id': '7754998', + 'ext': 'mp4', + 'title': 'letelejournal22h', + 'description': 'INTEGRALE WEB 22H-TJ', + 'upload_date': '20170720', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, + } + ] + _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None + + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} + query.update({ + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', + 'output': 'json', + }) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): + data = self._parse_json(e.cause.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise + + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] + + def get_meta(name): + for meta in metas: + if meta.get('name') == name: + text = meta.get('text') + if text: + return text + + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/ytdl-org/youtube-dl/pull/18609). + if get_meta('protectionType'): + self.report_warning('This video is probably DRM protected.') + + query = { + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + } + if self._claims: + query['claims'] = self._claims + v_data = self._call_api('validation/v2/', video_id, app_code, query) + v_url = v_data.get('url') + if not v_url: + error = v_data['message'] + if error == "Le contenu sélectionné n'est pas disponible dans votre pays": + raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') + self._sort_formats(formats) + + subtitles = {} + closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') + if closed_caption_url: + subtitles['fr'] = [{ + 'url': closed_caption_url, + 'ext': determine_ext(closed_caption_url, 'vtt'), + }] + + return { + 'id': video_id, + 'title': get_meta('Title') or get_meta('AV-nomEmission'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'subtitles': subtitles, + 'formats': formats, + } + + def _real_extract(self, url): + return self._extract_info(*self._match_valid_url(url).groups()) + + +class RadioCanadaAudioVideoIE(InfoExtractor): + IE_NAME = 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'mp4', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) diff --git a/plugins/youtube_download/yt_dlp/extractor/radiode.py b/plugins/youtube_download/yt_dlp/extractor/radiode.py new file mode 100644 index 0000000..0382873 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiode.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): + IE_NAME = 'radio.de' + _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' + _TEST = { + 'url': 'http://ndr2.radio.de/', + 'info_dict': { + 'id': 'ndr2', + 'ext': 'mp3', + 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:591c49c702db1a33751625ebfb67f273', + 'thumbnail': r're:^https?://.*\.png', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + radio_id = self._match_id(url) + webpage = self._download_webpage(url, radio_id) + jscode = self._search_regex( + r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", + webpage, 'broadcast') + + broadcast = self._parse_json(jscode, radio_id) + title = broadcast['name'] + description = broadcast.get('description') or broadcast.get('shortDescription') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') + + formats = [{ + 'url': stream['streamUrl'], + 'ext': stream['streamContentFormat'].lower(), + 'acodec': stream['streamContentFormat'], + 'abr': stream['bitRate'], + 'asr': stream['sampleRate'] + } for stream in broadcast['streamUrls']] + self._sort_formats(formats) + + return { + 'id': radio_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'is_live': True, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiofrance.py b/plugins/youtube_download/yt_dlp/extractor/radiofrance.py new file mode 100644 index 0000000..082238b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiofrance.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RadioFranceIE(InfoExtractor): + _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' + IE_NAME = 'radiofrance' + + _TEST = { + 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', + 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', + 'info_dict': { + 'id': 'one-one', + 'ext': 'ogg', + 'title': 'One to one', + 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", + 'uploader': 'Thomas Hercouët', + }, + } + + def _real_extract(self, url): + m = self._match_valid_url(url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') + description = self._html_search_regex( + r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', + webpage, 'description', fatal=False) + uploader = self._html_search_regex( + r'<div class="credit">  © (.*?)</div>', + webpage, 'uploader', fatal=False) + + formats_str = self._html_search_regex( + r'class="jp-jplayer[^"]*" data-source="([^"]+)">', + webpage, 'audio URLs') + formats = [ + { + 'format_id': fm[0], + 'url': fm[1], + 'vcodec': 'none', + 'quality': i, + } + for i, fm in + enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiojavan.py b/plugins/youtube_download/yt_dlp/extractor/radiojavan.py new file mode 100644 index 0000000..3f74f0c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiojavan.py @@ -0,0 +1,83 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_resolution, + str_to_int, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class RadioJavanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + download_host = self._download_json( + 'https://www.radiojavan.com/videos/video_host', video_id, + data=urlencode_postdata({'id': video_id}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }).get('host', 'https://host1.rjmusicmedia.com') + + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) + self._sort_formats(formats) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) + + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiokapital.py b/plugins/youtube_download/yt_dlp/extractor/radiokapital.py new file mode 100644 index 0000000..2e93e03 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiokapital.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + clean_html, + traverse_obj, + unescapeHTML, +) + +import itertools +from urllib.parse import urlencode + + +class RadioKapitalBaseIE(InfoExtractor): + def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): + return self._download_json( + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + video_id, note=note) + + def _parse_episode(self, data): + release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + return { + '_type': 'url_transparent', + 'url': data['mixcloud_url'], + 'ie_key': 'Mixcloud', + 'title': unescapeHTML(data['title']), + 'description': clean_html(data.get('content')), + 'tags': traverse_obj(data, ('tags', ..., 'name')), + 'release_date': release, + 'series': traverse_obj(data, ('show', 'title')), + } + + +class RadioKapitalIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', + 'info_dict': { + 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', + 'ext': 'm4a', + 'title': '#5: It’s okay to\xa0be\xa0immaterial', + 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', + 'uploader': 'Radio Kapitał', + 'uploader_id': 'radiokapital', + 'timestamp': 1621640164, + 'upload_date': '20210521', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + episode = self._call_api('episodes/%s' % video_id, video_id) + return self._parse_episode(episode) + + +class RadioKapitalShowIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital:show' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/wesz', + 'info_dict': { + 'id': '100', + 'title': 'WĘSZ', + 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', + }, + 'playlist_mincount': 17, + }] + + def _get_episode_list(self, series_id, page_no): + return self._call_api( + 'episodes', series_id, + f'Downloading episode list page #{page_no}', qs={ + 'show': series_id, + 'page': page_no, + }) + + def _entries(self, series_id): + for page_no in itertools.count(1): + episode_list = self._get_episode_list(series_id, page_no) + yield from (self._parse_episode(ep) for ep in episode_list['items']) + if episode_list['next'] is None: + break + + def _real_extract(self, url): + series_id = self._match_id(url) + + show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') + entries = self._entries(series_id) + return { + '_type': 'playlist', + 'entries': entries, + 'id': str(show['id']), + 'title': show.get('title'), + 'description': clean_html(show.get('content')), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radiozet.py b/plugins/youtube_download/yt_dlp/extractor/radiozet.py new file mode 100644 index 0000000..2e1ff36 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radiozet.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + strip_or_none, +) + + +class RadioZetPodcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' + _TEST = { + 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', + 'info_dict': { + 'id': '42154', + 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'title': 'O przedmiotach szkolnych, które przydają się w życiu', + 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', + 'release_timestamp': 1592985480, + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 83, + 'series': 'Nie Ma Za Co', + 'creator': 'Katarzyna Pakosińska', + } + } + + def _call_api(self, podcast_id, display_id): + return self._download_json( + f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', + display_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', + webpage, 'podcast id') + data = self._call_api(podcast_id, display_id)['data'][0] + + return { + 'id': podcast_id, + 'display_id': display_id, + 'title': strip_or_none(data.get('title')), + 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), + 'release_timestamp': data.get('published_date'), + 'url': traverse_obj(data, ('player', 'stream')), + 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), + 'duration': traverse_obj(data, ('player', 'duration')), + 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), + 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/radlive.py b/plugins/youtube_download/yt_dlp/extractor/radlive.py new file mode 100644 index 0000000..dc98973 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/radlive.py @@ -0,0 +1,185 @@ +import json + +from ..utils import ( + ExtractorError, + format_field, + traverse_obj, + try_get, + unified_timestamp +) +from .common import InfoExtractor + + +class RadLiveIE(InfoExtractor): + IE_NAME = 'radlive' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a', + 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff', + 'info_dict': { + 'id': 'dc5acfbc-761b-4bec-9564-df999905116a', + 'ext': 'mp4', + 'title': 'Deathpact - Digital Mirage 2 [Full Set]', + 'language': 'en', + 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png', + 'description': '', + 'release_timestamp': 1600185600.0, + 'channel': 'Proximity', + 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', + 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', + } + }, { + 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'ext': 'mp4', + 'title': 'E01: Bad Jokes 1', + 'language': 'en', + 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg', + 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype', + 'release_timestamp': None, + 'channel': None, + 'channel_id': None, + 'channel_url': None, + 'episode': 'E01: Bad Jokes 1', + 'episode_number': 1, + 'episode_id': '336', + }, + }] + + def _real_extract(self, url): + content_type, video_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage(url, video_id) + + content_info = json.loads(self._search_regex( + r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info[content_type] + + if not video_info: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) + self._sort_formats(formats) + + data = video_info.get('structured_data', {}) + + release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate'))) + channel = next(iter(content_info.get('channels', [])), {}) + channel_id = channel.get('lrn', '').split(':')[-1] or None + + result = { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')), + 'thumbnail': traverse_obj(data, ('image', 'contentUrl')), + 'description': data.get('description'), + 'release_timestamp': release_date, + 'channel': channel.get('name'), + 'channel_id': channel_id, + 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'), + + } + if content_type == 'episode': + result.update({ + # TODO: Get season number when downloading single episode + 'episode': video_info.get('title'), + 'episode_number': video_info.get('number'), + 'episode_id': video_info.get('id'), + }) + + return result + + +class RadLiveSeasonIE(RadLiveIE): + IE_NAME = 'radlive:season' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75', + 'title': 'Bad Jokes - Season 1', + }, + 'playlist_mincount': 5, + }] + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url) + + def _real_extract(self, url): + season_id = self._match_id(url) + webpage = self._download_webpage(url, season_id) + + content_info = json.loads(self._search_regex( + r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info['season'] + + entries = [{ + '_type': 'url_transparent', + 'id': episode['structured_data']['url'].split('/')[-1], + 'url': episode['structured_data']['url'], + 'series': try_get(content_info, lambda x: x['series']['title']), + 'season': video_info['title'], + 'season_number': video_info.get('number'), + 'season_id': video_info.get('id'), + 'ie_key': RadLiveIE.ie_key(), + } for episode in video_info['episodes']] + + return self.playlist_result(entries, season_id, video_info.get('title')) + + +class RadLiveChannelIE(RadLiveIE): + IE_NAME = 'radlive:channel' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274', + 'md5': '625156a08b7f2b0b849f234e664457ac', + 'info_dict': { + 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274', + 'title': 'Whistle Sports', + }, + 'playlist_mincount': 7, + }] + + _QUERY = ''' +query WebChannelListing ($lrn: ID!) { + channel (id:$lrn) { + name + features { + structured_data + } + } +}''' + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + graphql = self._download_json( + 'https://content.mhq.12core.net/graphql', channel_id, + headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'query': self._QUERY, + 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'} + }).encode('utf-8')) + + data = traverse_obj(graphql, ('data', 'channel')) + if not data: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + entries = [{ + '_type': 'url_transparent', + 'url': feature['structured_data']['url'], + 'ie_key': RadLiveIE.ie_key(), + } for feature in data['features']] + + return self.playlist_result(entries, channel_id, data.get('name')) diff --git a/plugins/youtube_download/yt_dlp/extractor/rai.py b/plugins/youtube_download/yt_dlp/extractor/rai.py new file mode 100644 index 0000000..34f1272 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rai.py @@ -0,0 +1,738 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + determine_ext, + ExtractorError, + find_xpath_attr, + fix_xml_ampersands, + GeoRestrictedError, + HEADRequest, + int_or_none, + join_nonempty, + parse_duration, + remove_start, + strip_or_none, + traverse_obj, + try_get, + unified_strdate, + unified_timestamp, + update_url_query, + urljoin, + xpath_text, +) + + +class RaiBaseIE(InfoExtractor): + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _GEO_COUNTRIES = ['IT'] + _GEO_BYPASS = False + + def _extract_relinker_info(self, relinker_url, video_id, audio_only=False): + if not re.match(r'https?://', relinker_url): + return {'formats': [{'url': relinker_url}]} + + formats = [] + geoprotection = None + is_live = None + duration = None + + for platform in ('mon', 'flash', 'native'): + relinker = self._download_xml( + relinker_url, video_id, + note='Downloading XML metadata for platform %s' % platform, + transform_source=fix_xml_ampersands, + query={'output': 45, 'pl': platform}, + headers=self.geo_verification_headers()) + + if not geoprotection: + geoprotection = xpath_text( + relinker, './geoprotection', default=None) == 'Y' + + if not is_live: + is_live = xpath_text( + relinker, './is_live', default=None) == 'Y' + if not duration: + duration = parse_duration(xpath_text( + relinker, './duration', default=None)) + + url_elem = find_xpath_attr(relinker, './url', 'type', 'content') + if url_elem is None: + continue + + media_url = url_elem.text + + # This does not imply geo restriction (e.g. + # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) + if '/video_no_available.mp4' in media_url: + continue + + ext = determine_ext(media_url) + if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): + continue + + if ext == 'mp3': + formats.append({ + 'url': media_url, + 'vcodec': 'none', + 'acodec': 'mp3', + 'format_id': 'http-mp3', + }) + break + elif ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m' or platform == 'flash': + manifest_url = update_url_query( + media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), + {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) + formats.extend(self._extract_f4m_formats( + manifest_url, video_id, f4m_id='hds', fatal=False)) + else: + bitrate = int_or_none(xpath_text(relinker, 'bitrate')) + formats.append({ + 'url': media_url, + 'tbr': bitrate if bitrate > 0 else None, + 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', + }) + + if not formats and geoprotection is True: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + + if not audio_only: + formats.extend(self._create_http_urls(relinker_url, formats)) + + return dict((k, v) for k, v in { + 'is_live': is_live, + 'duration': duration, + 'formats': formats, + }.items() if v is not None) + + def _create_http_urls(self, relinker_url, fmts): + _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' + _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' + _QUALITY = { + # tbr: w, h + '250': [352, 198], + '400': [512, 288], + '700': [512, 288], + '800': [700, 394], + '1200': [736, 414], + '1800': [1024, 576], + '2400': [1280, 720], + '3200': [1440, 810], + '3600': [1440, 810], + '5000': [1920, 1080], + '10000': [1920, 1080], + } + + def test_url(url): + resp = self._request_webpage( + HEADRequest(url), None, headers={'User-Agent': 'Rai'}, + fatal=False, errnote=False, note=False) + + if resp is False: + return False + + if resp.code == 200: + return False if resp.url == url else resp.url + return None + + # filter out audio-only formats + fmts = [f for f in fmts if not f.get('vcodec') == 'none'] + + def get_format_info(tbr): + import math + br = int_or_none(tbr) + if len(fmts) == 1 and not br: + br = fmts[0].get('tbr') + if br > 300: + tbr = compat_str(math.floor(br / 100) * 100) + else: + tbr = '250' + + # try extracting info from available m3u8 formats + format_copy = None + for f in fmts: + if f.get('tbr'): + br_limit = math.floor(br / 100) + if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1: + format_copy = f.copy() + return { + 'width': format_copy.get('width'), + 'height': format_copy.get('height'), + 'tbr': format_copy.get('tbr'), + 'vcodec': format_copy.get('vcodec'), + 'acodec': format_copy.get('acodec'), + 'fps': format_copy.get('fps'), + 'format_id': 'https-%s' % tbr, + } if format_copy else { + 'width': _QUALITY[tbr][0], + 'height': _QUALITY[tbr][1], + 'format_id': 'https-%s' % tbr, + 'tbr': int(tbr), + } + + loc = test_url(_MP4_TMPL % (relinker_url, '*')) + if not isinstance(loc, compat_str): + return [] + + mobj = re.match( + _RELINKER_REG, + test_url(relinker_url) or '') + if not mobj: + return [] + + available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] + available_qualities = [i for i in available_qualities if i] + + formats = [] + for q in available_qualities: + fmt = { + 'url': _MP4_TMPL % (relinker_url, q), + 'protocol': 'https', + 'ext': 'mp4', + } + fmt.update(get_format_info(q)) + formats.append(fmt) + return formats + + @staticmethod + def _extract_subtitles(url, video_data): + STL_EXT = 'stl' + SRT_EXT = 'srt' + subtitles = {} + subtitles_array = video_data.get('subtitlesArray') or [] + for k in ('subtitles', 'subtitlesUrl'): + subtitles_array.append({'url': video_data.get(k)}) + for subtitle in subtitles_array: + sub_url = subtitle.get('url') + if sub_url and isinstance(sub_url, compat_str): + sub_lang = subtitle.get('language') or 'it' + sub_url = urljoin(url, sub_url) + sub_ext = determine_ext(sub_url, SRT_EXT) + subtitles.setdefault(sub_lang, []).append({ + 'ext': sub_ext, + 'url': sub_url, + }) + if STL_EXT == sub_ext: + subtitles[sub_lang].append({ + 'ext': SRT_EXT, + 'url': sub_url[:-len(STL_EXT)] + SRT_EXT, + }) + return subtitles + + +class RaiPlayIE(RaiBaseIE): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE + _TESTS = [{ + 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', + 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014', + 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai Gulp', + 'duration': 6160, + 'series': 'Report', + 'season': '2013/14', + 'subtitles': { + 'it': 'count:4', + }, + }, + 'params': { + 'skip_download': True, + }, + }, { + # 1080p direct mp4 url + 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html', + 'md5': 'aeda7243115380b2dd5e881fd42d949a', + 'info_dict': { + 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736', + 'ext': 'mp4', + 'title': 'Blanca - S1E1 - Senza occhi', + 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi', + 'description': 'md5:75f95d5c030ec8bac263b1212322e28c', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai 1', + 'duration': 6493, + 'series': 'Blanca', + 'season': 'Season 1', + }, + }, { + 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', + 'only_matching': True, + }, { + # subtitles at 'subtitlesArray' key (see #27698) + 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', + 'only_matching': True, + }, { + # DRM protected + 'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + base, video_id = self._match_valid_url(url).groups() + + media = self._download_json( + base + '.json', video_id, 'Downloading video JSON') + + if not self.get_param('allow_unplayable_formats'): + if try_get( + media, + (lambda x: x['rights_management']['rights']['drm'], + lambda x: x['program_info']['rights_management']['rights']['drm']), + dict): + self.report_drm(video_id) + + title = media['name'] + video = media['video'] + + relinker_info = self._extract_relinker_info(video['content_url'], video_id) + self._sort_formats(relinker_info['formats']) + + thumbnails = [] + for _, value in media.get('images', {}).items(): + if value: + thumbnails.append({ + 'url': urljoin(url, value), + }) + + date_published = media.get('date_published') + time_published = media.get('time_published') + if date_published and time_published: + date_published += ' ' + time_published + + subtitles = self._extract_subtitles(url, video) + + program_info = media.get('program_info') or {} + season = media.get('season') + + alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ') + + info = { + 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, + 'display_id': video_id, + 'title': title, + 'alt_title': strip_or_none(alt_title), + 'description': media.get('description'), + 'uploader': strip_or_none(media.get('channel')), + 'creator': strip_or_none(media.get('editor') or None), + 'duration': parse_duration(video.get('duration')), + 'timestamp': unified_timestamp(date_published), + 'thumbnails': thumbnails, + 'series': program_info.get('name'), + 'season_number': int_or_none(season), + 'season': season if (season and not season.isdigit()) else None, + 'episode': media.get('episode_title'), + 'episode_number': int_or_none(media.get('episode')), + 'subtitles': subtitles, + } + + info.update(relinker_info) + return info + + +class RaiPlayLiveIE(RaiPlayIE): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' + _TESTS = [{ + 'url': 'http://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + }] + + +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' + _TESTS = [{ + 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', + }, + 'playlist_mincount': 12, + }, { + 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo - Stagione 2', + 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + base, playlist_id, extra_id = self._match_valid_url(url).groups() + + program = self._download_json( + base + '.json', playlist_id, 'Downloading program JSON') + + if extra_id: + extra_id = extra_id.upper().rstrip('/') + + playlist_title = program.get('name') + entries = [] + for b in (program.get('blocks') or []): + for s in (b.get('sets') or []): + if extra_id: + if extra_id != join_nonempty( + b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper(): + continue + playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ') + + s_id = s.get('id') + if not s_id: + continue + medias = self._download_json( + '%s/%s.json' % (base, s_id), s_id, + 'Downloading content set JSON', fatal=False) + if not medias: + continue + for m in (medias.get('items') or []): + path_id = m.get('path_id') + if not path_id: + continue + video_url = urljoin(url, path_id) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result( + entries, playlist_id, playlist_title, + try_get(program, lambda x: x['program_info']['description'])) + + +class RaiPlaySoundIE(RaiBaseIE): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE + _TESTS = [{ + 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707', + 'ext': 'mp3', + 'title': 'Il Ruggito del Coniglio del 10/12/2021', + 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'rai radio 2', + 'duration': 5685, + 'series': 'Il Ruggito del Coniglio', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + base, audio_id = self._match_valid_url(url).group('base', 'id') + media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON') + uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-')) + + info = {} + formats = [] + relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url'))) + for r in relinkers: + info = self._extract_relinker_info(r, audio_id, True) + formats.extend(info.get('formats')) + + date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}', + lambda x: x['live']['create_date'])) + + podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {} + thumbnails = [{ + 'url': urljoin(url, thumb_url), + } for thumb_url in (podcast_info.get('images') or {}).values() if thumb_url] + + return { + **info, + 'id': uid or audio_id, + 'display_id': audio_id, + 'title': traverse_obj(media, 'title', 'episode_title'), + 'alt_title': traverse_obj(media, ('track_info', 'media_name')), + 'description': media.get('description'), + 'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none), + 'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none), + 'timestamp': unified_timestamp(date_published), + 'thumbnails': thumbnails, + 'series': podcast_info.get('title'), + 'season_number': int_or_none(media.get('season')), + 'episode': media.get('episode_title'), + 'episode_number': int_or_none(media.get('episode')), + 'formats': formats, + } + + +class RaiPlaySoundLiveIE(RaiPlaySoundIE): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)' + _TESTS = [{ + 'url': 'https://www.raiplaysound.it/radio2', + 'info_dict': { + 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', + 'display_id': 'radio2', + 'ext': 'mp4', + 'title': 'Rai Radio 2', + 'uploader': 'rai radio 2', + 'creator': 'raiplaysound', + 'is_live': True, + }, + 'params': { + 'skip_download': 'live', + }, + }] + + +class RaiPlaySoundPlaylistIE(InfoExtractor): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' + _TESTS = [{ + 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio', + 'info_dict': { + 'id': 'ilruggitodelconiglio', + 'title': 'Il Ruggito del Coniglio', + 'description': 'md5:1bbaf631245a7ab1ec4d9fbb3c7aa8f3', + }, + 'playlist_mincount': 65, + }, { + 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995', + 'info_dict': { + 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995', + 'title': 'Prima Stagione 1995', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id') + url = f'{base}.json' + program = self._download_json(url, playlist_id, 'Downloading program JSON') + + if extra_id: + extra_id = extra_id.rstrip('/') + playlist_id += '_' + extra_id.replace('/', '_') + path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink')) + program = self._download_json( + urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON') + + entries = [ + self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key()) + for c in traverse_obj(program, 'cards', ('block', 'cards')) or [] + if c.get('path_id')] + + return self.playlist_result(entries, playlist_id, program.get('title'), + traverse_obj(program, ('podcast_info', 'description'))) + + +class RaiIE(RaiBaseIE): + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _TESTS = [{ + # var uniquename = "ContentItem-..." + # data-id="ContentItem-..." + 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1758, + 'upload_date': '20140612', + }, + 'skip': 'This content is available only in Italy', + }, { + # with ContentItem in many metas + 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', + 'info_dict': { + 'id': '1632c009-c843-4836-bb65-80c33084a64b', + 'ext': 'mp4', + 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', + 'description': 'I film in uscita questa settimana.', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 833, + 'upload_date': '20161103', + } + }, { + # with ContentItem in og:url + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', + 'md5': '06345bd97c932f19ffb129973d07a020', + 'info_dict': { + 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', + 'ext': 'mp4', + 'title': 'TG1 ore 20:00 del 03/11/2016', + 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2214, + 'upload_date': '20161103', + } + }, { + # initEdizione('ContentItem-...' + 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', + 'info_dict': { + 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303', + 'ext': 'mp4', + 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', + 'duration': 2274, + 'upload_date': '20170401', + }, + 'skip': 'Changes daily', + }, { + # HLS live stream with ContentItem in og:url + 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', + 'info_dict': { + 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', + 'ext': 'mp4', + 'title': 'La diretta di Rainews24', + }, + 'params': { + 'skip_download': True, + }, + }, { + # Direct MMS URL + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', + 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, + }] + + def _extract_from_content_id(self, content_id, url): + media = self._download_json( + 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, + content_id, 'Downloading video JSON') + + title = media['name'].strip() + + media_type = media['type'] + if 'Audio' in media_type: + relinker_info = { + 'formats': [{ + 'format_id': media.get('formatoAudio'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + }] + } + elif 'Video' in media_type: + relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) + else: + raise ExtractorError('not a media file') + + self._sort_formats(relinker_info['formats']) + + thumbnails = [] + for image_type in ('image', 'image_medium', 'image_300'): + thumbnail_url = media.get(image_type) + if thumbnail_url: + thumbnails.append({ + 'url': compat_urlparse.urljoin(url, thumbnail_url), + }) + + subtitles = self._extract_subtitles(url, media) + + info = { + 'id': content_id, + 'title': title, + 'description': strip_or_none(media.get('desc')), + 'thumbnails': thumbnails, + 'uploader': media.get('author'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(media.get('length')), + 'subtitles': subtitles, + } + + info.update(relinker_info) + + return info + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + content_item_id = None + + content_item_url = self._html_search_meta( + ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', + 'twitter:player', 'jsonlink'), webpage, default=None) + if content_item_url: + content_item_id = self._search_regex( + r'ContentItem-(%s)' % self._UUID_RE, content_item_url, + 'content item id', default=None) + + if not content_item_id: + content_item_id = self._search_regex( + r'''(?x) + (?: + (?:initEdizione|drawMediaRaiTV)\(| + <(?:[^>]+\bdata-id|var\s+uniquename)=| + <iframe[^>]+\bsrc= + ) + (["\']) + (?:(?!\1).)*\bContentItem-(?P<id>%s) + ''' % self._UUID_RE, + webpage, 'content item id', default=None, group='id') + + content_item_ids = set() + if content_item_id: + content_item_ids.add(content_item_id) + if video_id not in content_item_ids: + content_item_ids.add(video_id) + + for content_item_id in content_item_ids: + try: + return self._extract_from_content_id(content_item_id, url) + except GeoRestrictedError: + raise + except ExtractorError: + pass + + relinker_url = self._proto_relative_url(self._search_regex( + r'''(?x) + (?: + var\s+videoURL| + mediaInfo\.mediaUri + )\s*=\s* + ([\'"]) + (?P<url> + (?:https?:)? + //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? + (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 + ''', + webpage, 'relinker URL', group='url')) + + relinker_info = self._extract_relinker_info( + urljoin(url, relinker_url), video_id) + self._sort_formats(relinker_info['formats']) + + title = self._search_regex( + r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + info = { + 'id': video_id, + 'title': title, + } + + info.update(relinker_info) + + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/raywenderlich.py b/plugins/youtube_download/yt_dlp/extractor/raywenderlich.py new file mode 100644 index 0000000..f04d51f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/raywenderlich.py @@ -0,0 +1,179 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + merge_dicts, + try_get, + unescapeHTML, + unified_timestamp, + urljoin, +) + + +class RayWenderlichIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<course_id>[^/]+)/lessons/(?P<id>\d+) + ''' + + _TESTS = [{ + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', + 'info_dict': { + 'id': '248377018', + 'ext': 'mp4', + 'title': 'Introduction', + 'description': 'md5:804d031b3efa9fcb49777d512d74f722', + 'timestamp': 1513906277, + 'upload_date': '20171222', + 'duration': 133, + 'uploader': 'Ray Wenderlich', + 'uploader_id': 'user3304672', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + }, + 'add_ie': [VimeoIE.ie_key()], + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { + 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', + 'only_matching': True, + }] + + @staticmethod + def _extract_video_id(data, lesson_id): + if not data: + return + groups = try_get(data, lambda x: x['groups'], list) or [] + if not groups: + return + for group in groups: + if not isinstance(group, dict): + continue + contents = try_get(data, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + ordinal = int_or_none(content.get('ordinal')) + if ordinal != lesson_id: + continue + video_id = content.get('identifier') + if video_id: + return compat_str(video_id) + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + course_id, lesson_id = mobj.group('course_id', 'id') + display_id = '%s/%s' % (course_id, lesson_id) + + webpage = self._download_webpage(url, display_id) + + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail') + + if '>Subscribe to unlock' in webpage: + raise ExtractorError( + 'This content is only available for subscribers', + expected=True) + + info = { + 'thumbnail': thumbnail, + } + + vimeo_id = self._search_regex( + r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) + + if not vimeo_id: + data = self._parse_json( + self._search_regex( + r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, + 'data collection', default='{}', group='data'), + display_id, transform_source=unescapeHTML, fatal=False) + video_id = self._extract_video_id( + data, lesson_id) or self._search_regex( + r'/videos/(\d+)/', thumbnail, 'video id') + headers = { + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + } + csrf_token = self._html_search_meta( + 'csrf-token', webpage, 'csrf token', default=None) + if csrf_token: + headers['X-CSRF-Token'] = csrf_token + video = self._download_json( + 'https://videos.raywenderlich.com/api/v1/videos/%s.json' + % video_id, display_id, headers=headers)['video'] + vimeo_id = video['clips'][0]['provider_id'] + info.update({ + '_type': 'url_transparent', + 'title': video.get('name'), + 'description': video.get('description') or video.get( + 'meta_description'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('created_at')), + }) + + return merge_dicts(info, self.url_result( + VimeoIE._smuggle_referrer( + 'https://player.vimeo.com/video/%s' % vimeo_id, url), + ie=VimeoIE.ie_key(), video_id=vimeo_id)) + + +class RayWenderlichCourseIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<id>[^/]+) + ''' + + _TEST = { + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', + 'info_dict': { + 'title': 'Testing in iOS', + 'id': '3530-testing-in-ios', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 29, + } + + @classmethod + def suitable(cls, url): + return False if RayWenderlichIE.suitable(url) else super( + RayWenderlichCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + entries = [] + lesson_urls = set() + for lesson_url in re.findall( + r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): + if lesson_url in lesson_urls: + continue + lesson_urls.add(lesson_url) + entries.append(self.url_result( + urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) + + title = self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', default=None) + + return self.playlist_result(entries, course_id, title) diff --git a/plugins/youtube_download/yt_dlp/extractor/rbmaradio.py b/plugins/youtube_download/yt_dlp/extractor/rbmaradio.py new file mode 100644 index 0000000..9642fbb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rbmaradio.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + int_or_none, + unified_timestamp, + update_url_query, +) + + +class RBMARadioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', + 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', + 'info_dict': { + 'id': 'ford-lopatin-live-at-primavera-sound-2011', + 'ext': 'mp3', + 'title': 'Main Stage - Ford & Lopatin at Primavera Sound', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 2452, + 'timestamp': 1307103164, + 'upload_date': '20110603', + }, + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + show_id = mobj.group('show_id') + episode_id = mobj.group('id') + + webpage = self._download_webpage(url, episode_id) + + episode = self._parse_json( + self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>', + webpage, 'json data'), + episode_id)['episodes'][show_id][episode_id] + + title = episode['title'] + + show_title = episode.get('showTitle') + if show_title: + title = '%s - %s' % (show_title, title) + + formats = [{ + 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), + 'format_id': compat_str(abr), + 'abr': abr, + 'vcodec': 'none', + } for abr in (96, 128, 192, 256)] + self._check_formats(formats, episode_id) + + description = clean_html(episode.get('longTeaser')) + thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) + duration = int_or_none(episode.get('duration')) + timestamp = unified_timestamp(episode.get('publishedAt')) + + return { + 'id': episode_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rcs.py b/plugins/youtube_download/yt_dlp/extractor/rcs.py new file mode 100644 index 0000000..ace611b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rcs.py @@ -0,0 +1,427 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + js_to_json, + base_url, + url_basename, + urljoin, +) + + +class RCSBaseIE(InfoExtractor): + # based on VideoPlayerLoader.prototype.getVideoSrc + # and VideoPlayerLoader.prototype.transformSrc from + # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs + _ALL_REPLACE = { + 'media2vam.corriere.it.edgesuite.net': + 'media2vam-corriere-it.akamaized.net', + 'media.youreporter.it.edgesuite.net': + 'media-youreporter-it.akamaized.net', + 'corrierepmd.corriere.it.edgesuite.net': + 'corrierepmd-corriere-it.akamaized.net', + 'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/': + 'video.corriere.it/vr360/videos/', + '.net//': '.net/', + } + _MP4_REPLACE = { + 'media2vam.corbologna.corriere.it.edgesuite.net': + 'media2vam-bologna-corriere-it.akamaized.net', + 'media2vam.corfiorentino.corriere.it.edgesuite.net': + 'media2vam-fiorentino-corriere-it.akamaized.net', + 'media2vam.cormezzogiorno.corriere.it.edgesuite.net': + 'media2vam-mezzogiorno-corriere-it.akamaized.net', + 'media2vam.corveneto.corriere.it.edgesuite.net': + 'media2vam-veneto-corriere-it.akamaized.net', + 'media2.oggi.it.edgesuite.net': + 'media2-oggi-it.akamaized.net', + 'media2.quimamme.it.edgesuite.net': + 'media2-quimamme-it.akamaized.net', + 'media2.amica.it.edgesuite.net': + 'media2-amica-it.akamaized.net', + 'media2.living.corriere.it.edgesuite.net': + 'media2-living-corriere-it.akamaized.net', + 'media2.style.corriere.it.edgesuite.net': + 'media2-style-corriere-it.akamaized.net', + 'media2.iodonna.it.edgesuite.net': + 'media2-iodonna-it.akamaized.net', + 'media2.leitv.it.edgesuite.net': + 'media2-leitv-it.akamaized.net', + } + _MIGRATION_MAP = { + 'videoamica-vh.akamaihd': 'amica', + 'media2-amica-it.akamaized': 'amica', + 'corrierevam-vh.akamaihd': 'corriere', + 'media2vam-corriere-it.akamaized': 'corriere', + 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno', + 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno', + 'corveneto-vh.akamaihd': 'corrieredelveneto', + 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto', + 'corbologna-vh.akamaihd': 'corrieredibologna', + 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna', + 'corfiorentino-vh.akamaihd': 'corrierefiorentino', + 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino', + 'corinnovazione-vh.akamaihd': 'corriereinnovazione', + 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet', + 'videogazzanet-vh.akamaihd': 'gazzanet', + 'videogazzaworld-vh.akamaihd': 'gazzaworld', + 'gazzettavam-vh.akamaihd': 'gazzetta', + 'media2vam-gazzetta-it.akamaized': 'gazzetta', + 'videoiodonna-vh.akamaihd': 'iodonna', + 'media2-leitv-it.akamaized': 'leitv', + 'videoleitv-vh.akamaihd': 'leitv', + 'videoliving-vh.akamaihd': 'living', + 'media2-living-corriere-it.akamaized': 'living', + 'media2-oggi-it.akamaized': 'oggi', + 'videooggi-vh.akamaihd': 'oggi', + 'media2-quimamme-it.akamaized': 'quimamme', + 'quimamme-vh.akamaihd': 'quimamme', + 'videorunning-vh.akamaihd': 'running', + 'media2-style-corriere-it.akamaized': 'style', + 'style-vh.akamaihd': 'style', + 'videostyle-vh.akamaihd': 'style', + 'media2-stylepiccoli-it.akamaized': 'stylepiccoli', + 'stylepiccoli-vh.akamaihd': 'stylepiccoli', + 'doveviaggi-vh.akamaihd': 'viaggi', + 'media2-doveviaggi-it.akamaized': 'viaggi', + 'media2-vivimilano-corriere-it.akamaized': 'vivimilano', + 'vivimilano-vh.akamaihd': 'vivimilano', + 'media2-youreporter-it.akamaized': 'youreporter' + } + _MIGRATION_MEDIA = { + 'advrcs-vh.akamaihd': '', + 'corriere-f.akamaihd': '', + 'corrierepmd-corriere-it.akamaized': '', + 'corrprotetto-vh.akamaihd': '', + 'gazzetta-f.akamaihd': '', + 'gazzettapmd-gazzetta-it.akamaized': '', + 'gazzprotetto-vh.akamaihd': '', + 'periodici-f.akamaihd': '', + 'periodicisecure-vh.akamaihd': '', + 'videocoracademy-vh.akamaihd': '' + } + + def _get_video_src(self, video): + mediaFiles = video.get('mediaProfile').get('mediaFile') + src = {} + # audio + if video.get('mediaType') == 'AUDIO': + for aud in mediaFiles: + # todo: check + src['mp3'] = aud.get('value') + # video + else: + for vid in mediaFiles: + if vid.get('mimeType') == 'application/vnd.apple.mpegurl': + src['m3u8'] = vid.get('value') + if vid.get('mimeType') == 'video/mp4': + src['mp4'] = vid.get('value') + + # replace host + for t in src: + for s, r in self._ALL_REPLACE.items(): + src[t] = src[t].replace(s, r) + for s, r in self._MP4_REPLACE.items(): + src[t] = src[t].replace(s, r) + + # switch cdn + if 'mp4' in src and 'm3u8' in src: + if ('-lh.akamaihd' not in src.get('m3u8') + and 'akamai' in src.get('mp4')): + if 'm3u8' in src: + matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8')) + src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % ( + self._MIGRATION_MAP[matches.group('host')], + matches.group('path').replace( + '///', '/').replace( + '//', '/').replace( + '.csmil', '.urlset' + ) + ) + if 'mp4' in src: + matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4')) + if matches: + if matches.group('host') in self._MIGRATION_MEDIA: + vh_stream = 'https://media2.corriereobjects.it' + if src.get('mp4').find('fcs.quotidiani_!'): + vh_stream = 'https://media2-it.corriereobjects.it' + src['mp4'] = '%s%s' % ( + vh_stream, + matches.group('path').replace( + '///', '/').replace( + '//', '/').replace( + '/fcs.quotidiani/mediacenter', '').replace( + '/fcs.quotidiani_!/mediacenter', '').replace( + 'corriere/content/mediacenter/', '').replace( + 'gazzetta/content/mediacenter/', '') + ) + else: + src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % ( + self._MIGRATION_MAP[matches.group('host')], + matches.group('path').replace('///', '/').replace('//', '/') + ) + + if 'mp3' in src: + src['mp3'] = src.get('mp3').replace( + 'media2vam-corriere-it.akamaized.net', + 'vod.rcsobjects.it/corriere') + if 'mp4' in src: + if src.get('mp4').find('fcs.quotidiani_!'): + src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects') + if 'm3u8' in src: + if src.get('m3u8').find('fcs.quotidiani_!'): + src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects') + + if 'geoblocking' in video.get('mediaProfile'): + if 'm3u8' in src: + src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects') + if 'mp4' in src: + src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects') + if 'm3u8' in src: + if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'): + src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset') + + return src + + def _create_formats(self, urls, video_id): + formats = [] + formats = self._extract_m3u8_formats( + urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + + if urls.get('mp4'): + formats.append({ + 'format_id': 'http-mp4', + 'url': urls['mp4'] + }) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + if 'cdn' not in mobj.groupdict(): + raise ExtractorError('CDN not found in url: %s' % url) + + # for leitv/youreporter/viaggi don't use the embed page + if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it']) + and (mobj.group('vid') == 'video')): + url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id) + + page = self._download_webpage(url, video_id) + + video_data = None + # look for json video data url + json = self._search_regex( + r'''(?x)url\s*=\s*(["']) + (?P<url> + (?:https?:)?//video\.rcs\.it + /fragment-includes/video-includes/.+?\.json + )\1;''', + page, video_id, group='url', default=None) + if json: + if json.startswith('//'): + json = 'https:%s' % json + video_data = self._download_json(json, video_id) + + # if json url not found, look for json video data directly in the page + else: + # RCS normal pages and most of the embeds + json = self._search_regex( + r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)', + page, video_id, default=None) + if not json and 'video-embed' in url: + page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id) + json = self._search_regex( + r'##start-video##({[\s\S]+?})##end-video##', + page, video_id, default=None) + if not json: + # if no video data found try search for iframes + emb = RCSEmbedsIE._extract_url(page) + if emb: + return { + '_type': 'url_transparent', + 'url': emb, + 'ie_key': RCSEmbedsIE.ie_key() + } + if json: + video_data = self._parse_json( + json, video_id, transform_source=js_to_json) + + if not video_data: + raise ExtractorError('Video data not found in the page') + + formats = self._create_formats( + self._get_video_src(video_data), video_id) + + description = (video_data.get('description') + or clean_html(video_data.get('htmlDescription')) + or self._html_search_meta('description', page)) + uploader = video_data.get('provider') or mobj.group('cdn') + + return { + 'id': video_id, + 'title': video_data.get('title'), + 'description': description, + 'uploader': uploader, + 'formats': formats + } + + +class RCSEmbedsIE(RCSBaseIE): + _VALID_URL = r'''(?x) + https?://(?P<vid>video)\. + (?P<cdn> + (?: + rcs| + (?:corriere\w+\.)?corriere| + (?:gazzanet\.)?gazzetta + )\.it) + /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)''' + _TESTS = [{ + 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037', + 'md5': '623ecc8ffe7299b2d0c1046d8331a9df', + 'info_dict': { + 'id': 'iodonna-0001585037', + 'ext': 'mp4', + 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"', + 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', + 'uploader': 'rcs.it', + } + }, { + # redownload the page changing 'video-embed' in 'video-json' + 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', + 'md5': 'a043e3fecbe4d9ed7fc5d888652a5440', + 'info_dict': { + 'id': 'gazzanet-mo05-0000260789', + 'ext': 'mp4', + 'title': 'Valentino Rossi e papà Graziano si divertono col drifting', + 'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a', + 'uploader': 'rcd', + } + }, { + 'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player', + 'match_only': True + }, { + 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', + 'match_only': True + }] + + @staticmethod + def _sanitize_urls(urls): + # add protocol if missing + for i, e in enumerate(urls): + if e.startswith('//'): + urls[i] = 'https:%s' % e + # clean iframes urls + for i, e in enumerate(urls): + urls[i] = urljoin(base_url(e), url_basename(e)) + return urls + + @staticmethod + def _extract_urls(webpage): + entries = [ + mobj.group('url') + for mobj in re.finditer(r'''(?x) + (?: + data-frame-src=| + <iframe[^\n]+src= + ) + (["']) + (?P<url>(?:https?:)?//video\. + (?: + rcs| + (?:corriere\w+\.)?corriere| + (?:gazzanet\.)?gazzetta + ) + \.it/video-embed/.+?) + \1''', webpage)] + return RCSEmbedsIE._sanitize_urls(entries) + + @staticmethod + def _extract_url(webpage): + urls = RCSEmbedsIE._extract_urls(webpage) + return urls[0] if urls else None + + +class RCSIE(RCSBaseIE): + _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\. + (?P<cdn> + (?: + corrieredelmezzogiorno\. + |corrieredelveneto\. + |corrieredibologna\. + |corrierefiorentino\. + )?corriere\.it + |(?:gazzanet\.)?gazzetta\.it) + /(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)''' + _TESTS = [{ + 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb', + 'md5': '0f4ededc202b0f00b6e509d831e2dcda', + 'info_dict': { + 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb', + 'ext': 'mp4', + 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante', + 'description': 'md5:93b51c9161ac8a64fb2f997b054d0152', + 'uploader': 'Corriere Tv', + } + }, { + # video data inside iframe + 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/', + 'md5': 'da378e4918d2afbf7d61c35abb948d4c', + 'info_dict': { + 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2', + 'ext': 'mp4', + 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen', + 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8', + 'uploader': 'DOVE Viaggi', + } + }, { + 'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar', + 'md5': 'eedc1b5defd18e67383afef51ff7bdf9', + 'info_dict': { + 'id': '49612410-00ca-11eb-bcd8-30d4253e0140', + 'ext': 'mp4', + 'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra', + 'description': 'md5:8c6e905dc3b9413218beca11ebd69778', + 'uploader': 'AMorici', + } + }, { + 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', + 'match_only': True + }] + + +class RCSVariousIE(RCSBaseIE): + _VALID_URL = r'''(?x)https?://www\. + (?P<cdn> + leitv\.it| + youreporter\.it + )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)''' + _TESTS = [{ + 'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/', + 'md5': '92b4e63667b8f95acb0a04da25ae28a1', + 'info_dict': { + 'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa', + 'ext': 'mp4', + 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto', + 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5', + 'uploader': 'leitv.it', + } + }, { + 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/', + 'md5': '8dccd436b47a830bab5b4a88232f391a', + 'info_dict': { + 'id': 'fiume-sesia-3-ottobre-2020', + 'ext': 'mp4', + 'title': 'Fiume Sesia 3 ottobre 2020', + 'description': 'md5:0070eef1cc884d13c970a4125063de55', + 'uploader': 'youreporter.it', + } + }] diff --git a/plugins/youtube_download/yt_dlp/extractor/rcti.py b/plugins/youtube_download/yt_dlp/extractor/rcti.py new file mode 100644 index 0000000..ac42e58 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rcti.py @@ -0,0 +1,378 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import random +import time + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + dict_get, + ExtractorError, + strip_or_none, + traverse_obj, + try_get +) + + +class RCTIPlusBaseIE(InfoExtractor): + def _real_initialize(self): + self._AUTH_KEY = self._download_json( + 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios + None, 'Fetching authorization key')['data']['access_token'] + + def _call_api(self, url, video_id, note=None): + json = self._download_json( + url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) + if json.get('status', {}).get('code', 0) != 0: + raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json) + return json.get('data'), json.get('meta') + + +class RCTIPlusIE(RCTIPlusBaseIE): + _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', + 'md5': '56ed45affad45fa18d5592a1bc199997', + 'info_dict': { + 'id': 'v_e22124', + 'title': 'Untuk Lola', + 'display_id': 'untuk-lola', + 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', + 'ext': 'mp4', + 'duration': 1400, + 'timestamp': 1615978800, + 'upload_date': '20210317', + 'series': 'Kiko : Untuk Lola', + 'season_number': 1, + 'episode_number': 1, + 'channel': 'RCTI', + }, + 'params': { + 'fixup': 'never', + }, + }, { # Clip; Series title doesn't appear on metadata JSON + 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', + 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', + 'info_dict': { + 'id': 'v_c3921', + 'title': 'Make A Wish', + 'display_id': 'make-a-wish', + 'description': 'Make A Wish', + 'ext': 'mp4', + 'duration': 288, + 'timestamp': 1571652600, + 'upload_date': '20191021', + 'series': 'Cahaya Terindah', + 'channel': 'RCTI', + }, + 'params': { + 'fixup': 'never', + }, + }, { # Extra + 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', + 'md5': 'c48106afdbce609749f5e0c007d9278a', + 'info_dict': { + 'id': 'v_ex9438', + 'title': 'md5:2ede828c0f8bde249e0912be150314ca', + 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', + 'description': 'md5:2ede828c0f8bde249e0912be150314ca', + 'ext': 'mp4', + 'duration': 93, + 'timestamp': 1587561540, + 'upload_date': '20200422', + 'series': 'iNews Malam', + 'channel': 'INews', + }, + }, { # Missed event/replay + 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', + 'md5': '649c5f27250faed1452ca8b91e06922d', + 'info_dict': { + 'id': 'v_pe2507', + 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB', + 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib', + 'ext': 'mp4', + 'timestamp': 1627142400, + 'upload_date': '20210724', + 'was_live': True, + 'release_timestamp': 1627369200, + }, + 'params': { + 'fixup': 'never', + }, + }, { # Live event; Cloudfront CDN + 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', + 'info_dict': { + 'id': 'v_le2530', + 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB', + 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', + 'ext': 'mp4', + 'timestamp': 1627898400, + 'upload_date': '20210802', + 'release_timestamp': 1628067600, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This live event has ended.', + }, { # TV; live_at is null + 'url': 'https://www.rctiplus.com/live-event/1/rcti', + 'info_dict': { + 'id': 'v_lt1', + 'title': 'RCTI', + 'display_id': 'rcti', + 'ext': 'mp4', + 'timestamp': 1546344000, + 'upload_date': '20190101', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + }] + _CONVIVA_JSON_TEMPLATE = { + 't': 'CwsSessionHb', + 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f', + 'clid': '0', + 'sid': 0, + 'seq': 0, + 'caps': 0, + 'sf': 7, + 'sdk': True, + } + + def _real_extract(self, url): + match = self._match_valid_url(url).groupdict() + video_type, video_id, display_id = match['type'], match['id'], match['display_id'] + + url_api_version = 'v2' if video_type == 'missed-event' else 'v1' + appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator + video_json = self._call_api( + f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0] + video_url = video_json['url'] + + is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at']) + if is_upcoming is None: + is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) + if is_upcoming: + self.raise_no_formats( + 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True) + if 'akamaized' in video_url: + # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API + conviva_json_data = { + **self._CONVIVA_JSON_TEMPLATE, + 'url': video_url, + 'sst': int(time.time()) + } + conviva_json_res = self._download_json( + 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, + 'Creating Conviva session', 'Failed to create Conviva session', + fatal=False, data=json.dumps(conviva_json_data).encode('utf-8')) + if conviva_json_res and conviva_json_res.get('err') != 'ok': + self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err'))) + + video_meta, meta_paths = self._call_api( + 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') + + thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') + if video_meta.get('portrait_image'): + thumbnails.append({ + 'id': 'portrait_image', + 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given + }) + if video_meta.get('landscape_image'): + thumbnails.append({ + 'id': 'landscape_image', + 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) + }) + try: + formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted(countries=['ID'], metadata_available=True) + else: + raise e + for f in formats: + if 'akamaized' in f['url'] or 'cloudfront' in f['url']: + f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs + + self._sort_formats(formats) + + return { + 'id': video_meta.get('product_id') or video_json.get('product_id'), + 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), + 'display_id': display_id, + 'description': video_meta.get('summary'), + 'timestamp': video_meta.get('release_date') or video_json.get('start_date'), + 'duration': video_meta.get('duration'), + 'categories': [video_meta['genre']] if video_meta.get('genre') else None, + 'average_rating': video_meta.get('star_rating'), + 'series': video_meta.get('program_title') or video_json.get('program_title'), + 'season_number': video_meta.get('season'), + 'episode_number': video_meta.get('episode'), + 'channel': video_json.get('tv_name'), + 'channel_id': video_json.get('tv_id'), + 'formats': formats, + 'thumbnails': thumbnails, + 'is_live': video_type == 'live-event' and not is_upcoming, + 'was_live': video_type == 'missed-event', + 'live_status': 'is_upcoming' if is_upcoming else None, + 'release_timestamp': video_json.get('live_at'), + } + + +class RCTIPlusSeriesIE(RCTIPlusBaseIE): + _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran', + 'playlist_mincount': 1019, + 'info_dict': { + 'id': '829', + 'title': 'Putri Untuk Pangeran', + 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d', + 'age_limit': 2, + 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'], + 'display_id': 'putri-untuk-pangeran', + 'tag': 'count:18', + }, + }, { # No episodes + 'url': 'https://www.rctiplus.com/programs/615/inews-pagi', + 'playlist_mincount': 388, + 'info_dict': { + 'id': '615', + 'title': 'iNews Pagi', + 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04', + 'age_limit': 2, + 'tag': 'count:11', + 'display_id': 'inews-pagi', + } + }] + _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings + 'S-SU': 2, + 'SU': 2, + 'P': 2, + 'A': 7, + 'R': 13, + 'R-R/1': 17, # Labelled as 17+ despite being R + 'D': 18, + } + + @classmethod + def suitable(cls, url): + return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url) + + def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): + total_pages = 0 + try: + total_pages = self._call_api( + '%s&length=20&page=0' % url, + display_id, note)[1]['pagination']['total_page'] + except ExtractorError as e: + if 'not found' in str(e): + return [] + raise e + if total_pages <= 0: + return [] + + for page_num in range(1, total_pages + 1): + episode_list = self._call_api( + '%s&length=20&page=%s' % (url, page_num), + display_id, '%s page %s' % (note, page_num))[0] or [] + + for video_json in episode_list: + yield { + '_type': 'url', + 'url': video_json['share_link'], + 'ie_key': RCTIPlusIE.ie_key(), + 'id': video_json.get('product_id'), + 'title': video_json.get('title'), + 'display_id': video_json.get('title_code').replace('_', '-'), + 'description': video_json.get('summary'), + 'timestamp': video_json.get('release_date'), + 'duration': video_json.get('duration'), + 'season_number': video_json.get('season'), + 'episode_number': video_json.get('episode'), + **metadata + } + + def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}): + if not video_type or video_type in 'episodes': + try: + seasons_list = self._call_api( + f'https://api.rctiplus.com/api/v1/program/{series_id}/season', + display_id, 'Downloading seasons list JSON')[0] + except ExtractorError as e: + if 'not found' not in str(e): + raise + seasons_list = [] + for season in seasons_list: + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}', + display_id, f'Downloading season {season["season"]} episode entries', metadata) + if not video_type or video_type in 'extras': + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0', + display_id, 'Downloading extra entries', metadata) + if not video_type or video_type in 'clips': + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0', + display_id, 'Downloading clip entries', metadata) + + def _real_extract(self, url): + series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type') + if video_type: + self.report_warning( + f'Only {video_type} will be downloaded. ' + f'To download everything from the series, remove "/{video_type}" from the URL') + + series_meta, meta_paths = self._call_api( + f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata') + metadata = { + 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]), + 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'), + expected_type=lambda x: strip_or_none(x) or None), + 'tag': traverse_obj(series_meta, ('tag', ..., 'name'), + expected_type=lambda x: strip_or_none(x) or None), + } + return self.playlist_result( + self._series_entries(series_id, display_id, video_type, metadata), series_id, + series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata) + + +class RCTIPlusTVIE(RCTIPlusBaseIE): + _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/tv/rcti', + 'info_dict': { + 'id': 'v_lt1', + 'title': 'RCTI', + 'ext': 'mp4', + 'timestamp': 1546344000, + 'upload_date': '20190101', + }, + 'params': { + 'skip_download': True, + } + }, { + # Returned video will always change + 'url': 'https://www.rctiplus.com/live-event', + 'only_matching': True, + }, { + # Returned video will also always change + 'url': 'https://www.rctiplus.com/missed-event', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url) + + def _real_extract(self, url): + match = self._match_valid_url(url).groupdict() + tv_id = match.get('tvname') or match.get('eventname') + webpage = self._download_webpage(url, tv_id) + video_type, video_id = self._search_regex( + r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', + webpage, 'video link', group=('type', 'id')) + return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus') diff --git a/plugins/youtube_download/yt_dlp/extractor/rds.py b/plugins/youtube_download/yt_dlp/extractor/rds.py new file mode 100644 index 0000000..0c49785 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rds.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + js_to_json, +) +from ..compat import compat_str + + +class RDSIE(InfoExtractor): + IE_DESC = 'RDS.ca' + _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' + + _TESTS = [{ + # has two 9c9media ContentPackages, the web player selects the first ContentPackage + 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606', + 'info_dict': { + 'id': '2083309', + 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande', + 'ext': 'flv', + 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande', + 'description': 'md5:83fa38ecc4a79b19e433433254077f25', + 'timestamp': 1606129030, + 'upload_date': '20201123', + 'duration': 773.039, + } + }, { + 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) + video_id = compat_str(item['id']) + title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( + 'title', webpage, 'title', fatal=True) + description = self._og_search_description(webpage) or self._html_search_meta( + 'description', webpage, 'description') + thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( + [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', + r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], + webpage, 'thumbnail', fatal=False) + timestamp = parse_iso8601(self._search_regex( + r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"', + webpage, 'upload date', fatal=False)) + duration = parse_duration(self._search_regex( + r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"', + webpage, 'duration', fatal=False)) + age_limit = self._family_friendly_search(webpage) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'display_id': display_id, + 'url': '9c9media:rds_web:%s' % video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'age_limit': age_limit, + 'ie_key': 'NineCNineMedia', + } diff --git a/plugins/youtube_download/yt_dlp/extractor/redbulltv.py b/plugins/youtube_download/yt_dlp/extractor/redbulltv.py new file mode 100644 index 0000000..756a366 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/redbulltv.py @@ -0,0 +1,229 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class RedBullTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)' + _TESTS = [{ + # film + 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', + 'md5': 'fb0445b98aa4394e504b413d98031d1f', + 'info_dict': { + 'id': 'AP-1Q6XCDTAN1W11', + 'ext': 'mp4', + 'title': 'ABC of... WRC - ABC of... S1E6', + 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', + 'duration': 1582.04, + }, + }, { + # episode + 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', + 'info_dict': { + 'id': 'AP-1PMHKJFCW1W11', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + 'duration': 904, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', + 'only_matching': True, + }] + + def extract_info(self, video_id): + session = self._download_json( + 'https://api.redbull.tv/v3/session', video_id, + note='Downloading access token', query={ + 'category': 'personal_computer', + 'os_family': 'http', + }) + if session.get('code') == 'error': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, session['message'])) + token = session['token'] + + try: + video = self._download_json( + 'https://api.redbull.tv/v3/products/' + video_id, + video_id, note='Downloading video information', + headers={'Authorization': token} + ) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + error_message = self._parse_json( + e.cause.read().decode(), video_id)['error'] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error_message), expected=True) + raise + + title = video['title'].strip() + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + for resource in video.get('resources', []): + if resource.startswith('closed_caption_'): + splitted_resource = resource.split('_') + if splitted_resource[2]: + subtitles.setdefault('en', []).append({ + 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), + 'ext': splitted_resource[2], + }) + + subheading = video.get('subheading') + if subheading: + title += ' - %s' % subheading + + return { + 'id': video_id, + 'title': title, + 'description': video.get('long_description') or video.get( + 'short_description'), + 'duration': float_or_none(video.get('duration'), scale=1000), + 'formats': formats, + 'subtitles': subtitles, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.extract_info(video_id) + + +class RedBullEmbedIE(RedBullTVIE): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' + _TESTS = [{ + # HLS manifest accessible only using assetId + 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', + 'only_matching': True, + }] + _VIDEO_ESSENSE_TMPL = '''... on %s { + videoEssence { + attributes + } + }''' + + def _real_extract(self, url): + rrn_id = self._match_id(url) + asset_id = self._download_json( + 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', + rrn_id, headers={ + 'Accept': 'application/json', + 'API-KEY': 'e90a1ff11335423998b100c929ecc866', + }, query={ + 'query': '''{ + resource(id: "%s", enforceGeoBlocking: false) { + %s + %s + } +}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), + })['data']['resource']['videoEssence']['attributes']['assetId'] + return self.extract_info(asset_id) + + +class RedBullTVRrnContentIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', + 'only_matching': True, + }] + + def _real_extract(self, url): + region, lang, rrn_id = self._match_valid_url(url).groups() + rrn_id += ':%s-%s' % (lang, region.upper()) + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) + + +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', + 'md5': 'db8271a7200d40053a1809ed0dd574ff', + 'info_dict': { + 'id': 'AA-1MT8DQWA91W14', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + }, + }, { + 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', + 'only_matching': True, + }, { + # only available on the int-en website so a fallback is need for the API + # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero + 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', + 'only_matching': True, + }] + _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] + _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] + + def _real_extract(self, url): + region, lang, filter_type, display_id = self._match_valid_url(url).groups() + if filter_type == 'episodes': + filter_type = 'episode-videos' + elif filter_type == 'live': + filter_type = 'live-videos' + + regions = [region.upper()] + if region != 'int': + if region in self._LAT_FALLBACK_MAP: + regions.append('LAT') + if lang in self._INT_FALLBACK_LIST: + regions.append('INT') + locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) + + rrn_id = self._download_json( + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, + display_id, query={ + 'filter[type]': filter_type, + 'filter[uriSlug]': display_id, + 'rb3Schema': 'v1:hero', + })['data']['id'] + + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/reddit.py b/plugins/youtube_download/yt_dlp/extractor/reddit.py new file mode 100644 index 0000000..a042a59 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/reddit.py @@ -0,0 +1,167 @@ +import random + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + try_get, + unescapeHTML, + url_or_none, + traverse_obj +) + + +class RedditIE(InfoExtractor): + _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'That small heart attack.', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:4', + 'timestamp': 1501941939, + 'upload_date': '20170805', + 'uploader': 'Antw87', + 'duration': 12, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', + 'only_matching': True, + }, { + # imgur + 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # imgur @ old reddit + 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # streamable + 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', + 'only_matching': True, + }, { + # youtube + 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', + 'only_matching': True, + }, { + # reddit video @ nm reddit + 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', + 'only_matching': True, + }, { + 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/', + 'only_matching': True, + }] + + @staticmethod + def _gen_session_id(): + id_length = 16 + rand_max = 1 << (id_length * 4) + return '%0.*x' % (id_length, random.randrange(rand_max)) + + def _real_extract(self, url): + subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id') + + self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) + self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') + data = self._download_json(f'https://{subdomain}reddit.com/r/{slug}/.json', video_id, fatal=False) + if not data: + # Fall back to old.reddit.com in case the requested subdomain fails + data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) + data = data[0]['data']['children'][0]['data'] + video_url = data['url'] + + # Avoid recursing into the same reddit URL + if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: + raise ExtractorError('No media found', expected=True) + + over_18 = data.get('over_18') + if over_18 is True: + age_limit = 18 + elif over_18 is False: + age_limit = 0 + else: + age_limit = None + + thumbnails = [] + + def add_thumbnail(src): + if not isinstance(src, dict): + return + thumbnail_url = url_or_none(src.get('url')) + if not thumbnail_url: + return + thumbnails.append({ + 'url': unescapeHTML(thumbnail_url), + 'width': int_or_none(src.get('width')), + 'height': int_or_none(src.get('height')), + }) + + for image in try_get(data, lambda x: x['preview']['images']) or []: + if not isinstance(image, dict): + continue + add_thumbnail(image.get('source')) + resolutions = image.get('resolutions') + if isinstance(resolutions, list): + for resolution in resolutions: + add_thumbnail(resolution) + + info = { + 'title': data.get('title'), + 'thumbnails': thumbnails, + 'timestamp': float_or_none(data.get('created_utc')), + 'uploader': data.get('author'), + 'like_count': int_or_none(data.get('ups')), + 'dislike_count': int_or_none(data.get('downs')), + 'comment_count': int_or_none(data.get('num_comments')), + 'age_limit': age_limit, + } + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = self._extract_m3u8_formats( + hls_playlist_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + dash_playlist_url, display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'duration': int_or_none(reddit_video.get('duration')), + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/redgifs.py b/plugins/youtube_download/yt_dlp/extractor/redgifs.py new file mode 100644 index 0000000..55196b7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/redgifs.py @@ -0,0 +1,232 @@ +# coding: utf-8 +import functools + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, + OnDemandPagedList, +) + + +class RedGifsBaseInfoExtractor(InfoExtractor): + _FORMATS = { + 'gif': 250, + 'sd': 480, + 'hd': None, + } + + def _parse_gif_data(self, gif_data): + video_id = gif_data.get('id') + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif_data.get('height')) + aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = gif_data['urls'].get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'webpage_url': f'https://redgifs.com/watch/{video_id}', + 'ie_key': RedGifsIE.ie_key(), + 'extractor': 'RedGifs', + 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif_data.get('createDate')), + 'uploader': gif_data.get('userName'), + 'duration': int_or_none(gif_data.get('duration')), + 'view_count': int_or_none(gif_data.get('views')), + 'like_count': int_or_none(gif_data.get('likes')), + 'categories': gif_data.get('tags') or [], + 'tags': gif_data.get('tags'), + 'age_limit': 18, + 'formats': formats, + } + + def _call_api(self, ep, video_id, *args, **kwargs): + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs) + if 'error' in data: + raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) + return data + + def _fetch_page(self, ep, video_id, query, page): + query['page'] = page + 1 + data = self._call_api( + ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') + + for entry in data['gifs']: + yield self._parse_gif_data(entry) + + def _prepare_api_query(self, query, fields): + api_query = [ + (field_name, query.get(field_name, (default,))[0]) + for field_name, default in fields.items()] + + return {key: val for key, val in api_query if val is not None} + + def _paged_entries(self, ep, item_id, query, fields): + page = int_or_none(query.get('page', (None,))[0]) + page_fetcher = functools.partial( + self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) + return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) + + +class RedGifsIE(RedGifsBaseInfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' + _TESTS = [{ + 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).lower() + video_info = self._call_api( + f'gifs/{video_id}', video_id, note='Downloading video info') + return self._parse_gif_data(video_info['gif']) + + +class RedGifsSearchIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs search' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' + _PAGE_SIZE = 80 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/browse?tags=Lesbian', + 'info_dict': { + 'id': 'tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by trending' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian&page=2', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_count': 80, + } + ] + + def _real_extract(self, url): + query_str = self._match_valid_url(url).group('query') + query = compat_parse_qs(query_str) + if not query.get('tags'): + raise ExtractorError('Invalid query tags', expected=True) + + tags = query.get('tags')[0] + order = query.get('order', ('trending',))[0] + + query['search_text'] = [tags] + entries = self._paged_entries('gifs/search', query_str, query, { + 'search_text': None, + 'order': 'trending', + 'type': None, + }) + + return self.playlist_result( + entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') + + +class RedGifsUserIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs user' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' + _PAGE_SIZE = 30 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/users/lamsinka89', + 'info_dict': { + 'id': 'lamsinka89', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', + 'info_dict': { + 'id': 'lamsinka89?page=3', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_count': 30, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', + 'info_dict': { + 'id': 'lamsinka89?order=best&type=g', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by best' + }, + 'playlist_mincount': 100, + } + ] + + def _real_extract(self, url): + username, query_str = self._match_valid_url(url).group('username', 'query') + playlist_id = f'{username}?{query_str}' if query_str else username + + query = compat_parse_qs(query_str) + order = query.get('order', ('recent',))[0] + + entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { + 'order': 'recent', + 'type': None, + }) + + return self.playlist_result( + entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') diff --git a/plugins/youtube_download/yt_dlp/extractor/redtube.py b/plugins/youtube_download/yt_dlp/extractor/redtube.py new file mode 100644 index 0000000..7fee54f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/redtube.py @@ -0,0 +1,150 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + merge_dicts, + str_to_int, + unified_strdate, + url_or_none, +) + + +class RedTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.redtube.com/38864951', + 'md5': '4fba70cbca3aefd25767ab4b523c9878', + 'info_dict': { + 'id': '38864951', + 'ext': 'mp4', + 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', + 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu', + 'upload_date': '20210111', + 'timestamp': 1610343109, + 'duration': 646, + 'view_count': int, + 'age_limit': 18, + 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', + }, + }, { + 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', + 'only_matching': True, + }, { + 'url': 'http://it.redtube.com/66418', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.redtube.com/%s' % video_id, video_id) + + ERRORS = ( + (('video-deleted-info', '>This video has been removed'), 'has been removed'), + (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), + ) + + for patterns, message in ERRORS: + if any(p in webpage for p in patterns): + raise ExtractorError( + 'Video %s %s' % (video_id, message), expected=True) + + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + formats = [] + sources = self._parse_json( + self._search_regex( + r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), + video_id, fatal=False) + if sources and isinstance(sources, dict): + for format_id, format_url in sources.items(): + if format_url: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + medias = self._parse_json( + self._search_regex( + r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, + 'media definitions', default='{}'), + video_id, fatal=False) + for media in medias if isinstance(medias, list) else []: + format_url = url_or_none(media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('format') + quality = media.get('quality') + if format_id == 'hls' or (format_id == 'mp4' and not quality): + more_media = self._download_json(format_url, video_id, fatal=False) + else: + more_media = [media] + for media in more_media if isinstance(more_media, list) else []: + format_url = url_or_none(media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('format') + if format_id == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id or 'hls', + fatal=False)) + continue + format_id = media.get('quality') + formats.append({ + 'url': format_url, + 'ext': 'mp4', + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + if not formats: + video_url = self._html_search_regex( + r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') + formats.append({'url': video_url, 'ext': 'mp4'}) + self._sort_formats(formats) + + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._search_regex( + r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', + webpage, 'upload date', default=None)) + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, default=None) or self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) + view_count = str_to_int(self._search_regex( + (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', + r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), + webpage, 'view count', default=None)) + + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + + return merge_dicts(info, { + 'id': video_id, + 'ext': 'mp4', + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'age_limit': age_limit, + 'formats': formats, + }) diff --git a/plugins/youtube_download/yt_dlp/extractor/regiotv.py b/plugins/youtube_download/yt_dlp/extractor/regiotv.py new file mode 100644 index 0000000..e250a52 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/regiotv.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + sanitized_Request, + xpath_text, + xpath_with_ns, +) + + +class RegioTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.regio-tv.de/video/395808.html', + 'info_dict': { + 'id': '395808', + 'ext': 'mp4', + 'title': 'Wir in Ludwigsburg', + 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', + } + }, { + 'url': 'http://www.regio-tv.de/video/395808', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + key = self._search_regex( + r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key') + title = self._og_search_title(webpage) + + SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>' + + request = sanitized_Request( + 'http://v.telvi.de/', + SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) + video_data = self._download_xml(request, video_id, 'Downloading video XML') + + NS_MAP = { + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', + } + + video_url = xpath_text( + video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) + thumbnail = xpath_text( + video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') + description = self._og_search_description( + webpage) or self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rentv.py b/plugins/youtube_download/yt_dlp/extractor/rentv.py new file mode 100644 index 0000000..7c8909d --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rentv.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + url_or_none, +) + + +class RENTVIE(InfoExtractor): + _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://ren.tv/video/epizod/118577', + 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'info_dict': { + 'id': '118577', + 'ext': 'mp4', + 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', + 'timestamp': 1472230800, + 'upload_date': '20160826', + } + }, { + 'url': 'http://ren.tv/player/118577', + 'only_matching': True, + }, { + 'url': 'rentv:118577', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) + config = self._parse_json(self._search_regex( + r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) + title = config['title'] + formats = [] + for video in config['src']: + src = url_or_none(video.get('src')) + if not src: + continue + ext = determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'duration': int_or_none(config.get('duration')), + 'timestamp': int_or_none(config.get('date')), + 'formats': formats, + } + + +class RENTVArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', + 'md5': 'ebd63c4680b167693745ab91343df1d6', + 'info_dict': { + 'id': '136472', + 'ext': 'mp4', + 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', + 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', + } + }, { + # TODO: invalid m3u8 + 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', + 'info_dict': { + 'id': 'playlist', + 'ext': 'mp4', + 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', + 'uploader': 'ren.tv', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + 'skip': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + entries = [] + for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): + media_id = config_profile.get('mediaid') + if not media_id: + continue + media_id = compat_str(media_id) + entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) + return self.playlist_result(entries, display_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/restudy.py b/plugins/youtube_download/yt_dlp/extractor/restudy.py new file mode 100644 index 0000000..d47fb45 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/restudy.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RestudyIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.restudy.dk/video/play/id/1637', + 'info_dict': { + 'id': '1637', + 'ext': 'flv', + 'title': 'Leiden-frosteffekt', + 'description': 'Denne video er et eksperiment med flydende kvælstof.', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + + formats = self._extract_smil_formats( + 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, + video_id) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/reuters.py b/plugins/youtube_download/yt_dlp/extractor/reuters.py new file mode 100644 index 0000000..9dc482d --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/reuters.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/reverbnation.py b/plugins/youtube_download/yt_dlp/extractor/reverbnation.py new file mode 100644 index 0000000..4cb99c2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/reverbnation.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + str_or_none, +) + + +class ReverbNationIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', + 'info_dict': { + 'id': '16965047', + 'ext': 'mp3', + 'title': 'MONA LISA', + 'uploader': 'ALKILADOS', + 'uploader_id': '216429', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }] + + def _real_extract(self, url): + song_id = self._match_id(url) + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s' % song_id, + song_id, + note='Downloading information of song %s' % song_id + ) + + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) + thumbnails = [] + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) + + return { + 'id': song_id, + 'title': api_res['name'], + 'url': api_res['url'], + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), + 'thumbnails': thumbnails, + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rice.py b/plugins/youtube_download/yt_dlp/extractor/rice.py new file mode 100644 index 0000000..cf2bb1b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rice.py @@ -0,0 +1,116 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ( + xpath_text, + xpath_element, + int_or_none, + parse_iso8601, + ExtractorError, +) + + +class RICEIE(InfoExtractor): + _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' + _TEST = { + 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', + 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', + 'info_dict': { + 'id': 'YEWIvbhb40aqdjMD1ALSqw', + 'ext': 'mp4', + 'title': 'Active Learning in Archeology', + 'upload_date': '20140616', + 'timestamp': 1402926346, + } + } + _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' + + def _real_extract(self, url): + qs = compat_parse_qs(self._match_valid_url(url).group('query')) + if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): + raise ExtractorError('Invalid URL', expected=True) + + portal_id = qs['PortalID'][0] + playlist_id = qs['DestinationID'][0] + content_id = qs['ContentID'][0] + + content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ + 'portalId': portal_id, + 'playlistId': playlist_id, + 'contentId': content_id + }) + metadata = xpath_element(content_data, './/metaData', fatal=True) + title = xpath_text(metadata, 'primaryTitle', fatal=True) + encodings = xpath_element(content_data, './/encodings', fatal=True) + player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ + 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), + 'contentId': content_id, + }) + + common_fmt = {} + dimensions = xpath_text(encodings, 'dimensions') + if dimensions: + wh = dimensions.split('x') + if len(wh) == 2: + common_fmt.update({ + 'width': int_or_none(wh[0]), + 'height': int_or_none(wh[1]), + }) + + formats = [] + rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) + if rtsp_path: + fmt = { + 'url': rtsp_path, + 'format_id': 'rtsp', + } + fmt.update(common_fmt) + formats.append(fmt) + for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): + video_url = xpath_text(source, self._xpath_ns('File', self._NS)) + if not video_url: + continue + if '.m3u8' in video_url: + formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + fmt = { + 'url': video_url, + 'format_id': video_url.split(':')[0], + } + fmt.update(common_fmt) + rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) + if rtmp: + fmt.update({ + 'url': rtmp.group('url'), + 'play_path': rtmp.group('playpath'), + 'app': rtmp.group('app'), + 'ext': 'flv', + }) + formats.append(fmt) + self._sort_formats(formats) + + thumbnails = [] + for content_asset in content_data.findall('.//contentAssets'): + asset_type = xpath_text(content_asset, 'type') + if asset_type == 'image': + image_url = xpath_text(content_asset, 'httpPath') + if not image_url: + continue + thumbnails.append({ + 'id': xpath_text(content_asset, 'ID'), + 'url': image_url, + }) + + return { + 'id': content_id, + 'title': title, + 'description': xpath_text(metadata, 'abstract'), + 'duration': int_or_none(xpath_text(metadata, 'duration')), + 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rmcdecouverte.py b/plugins/youtube_download/yt_dlp/extractor/rmcdecouverte.py new file mode 100644 index 0000000..8bfce34 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rmcdecouverte.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from .brightcove import BrightcoveLegacyIE +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import smuggle_url + + +class RMCDecouverteIE(InfoExtractor): + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)' + + _TESTS = [{ + 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/', + 'info_dict': { + 'id': '6250879771001', + 'ext': 'mp4', + 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH', + 'uploader_id': '1969646226001', + 'description': 'md5:aed573ca24abde62a148e0eba909657d', + 'timestamp': 1619622984, + 'upload_date': '20210428', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', + 'info_dict': { + 'id': '5983675500001', + 'ext': 'mp4', + 'title': 'CORVETTE', + 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', + 'uploader_id': '1969646226001', + 'upload_date': '20181226', + 'timestamp': 1545861635, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'only available for a week', + }, { + 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598', + 'only_matching': True, + }, { + # The website accepts any URL as long as it has _\d+ at the end + 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598', + 'only_matching': True, + }, { + # live, geo restricted, bypassable + 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('id') or 'direct' + webpage = self._download_webpage(url, display_id) + brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + if brightcove_legacy_url: + brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_legacy_url).query)['@videoPlayer'][0] + else: + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') + return self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['FR']}), + 'BrightcoveNew', brightcove_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/ro220.py b/plugins/youtube_download/yt_dlp/extractor/ro220.py new file mode 100644 index 0000000..69934ef --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ro220.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class Ro220IE(InfoExtractor): + IE_NAME = '220.ro' + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/', + 'md5': '03af18b73a07b4088753930db7a34add', + 'info_dict': { + 'id': 'LYV6doKo7f', + 'ext': 'mp4', + 'title': 'Luati-le Banii sez 4 ep 1', + 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + url = compat_urllib_parse_unquote(self._search_regex( + r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url')) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': 'mp4', + }] + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rockstargames.py b/plugins/youtube_download/yt_dlp/extractor/rockstargames.py new file mode 100644 index 0000000..cd6904b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rockstargames.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class RockstarGamesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1464876000, + 'upload_date': '20160602', + } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] + + formats = [] + for video in video['files_processed']['video/mp4']: + if not video.get('src'): + continue + resolution = video.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) + formats.append({ + 'url': self._proto_relative_url(video['src']), + 'format_id': resolution, + 'height': height, + }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/roosterteeth.py b/plugins/youtube_download/yt_dlp/extractor/roosterteeth.py new file mode 100644 index 0000000..652fdd1 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/roosterteeth.py @@ -0,0 +1,219 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + join_nonempty, + LazyList, + parse_qs, + str_or_none, + traverse_obj, + url_or_none, + urlencode_postdata, + urljoin, + update_url_query, +) + + +class RoosterTeethBaseIE(InfoExtractor): + _NETRC_MACHINE = 'roosterteeth' + _API_BASE = 'https://svod-be.roosterteeth.com' + _API_BASE_URL = f'{_API_BASE}/api/v1' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): + return + + try: + self._download_json( + 'https://auth.roosterteeth.com/oauth/token', + None, 'Logging in', data=urlencode_postdata({ + 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', + 'grant_type': 'password', + 'username': username, + 'password': password, + })) + except ExtractorError as e: + msg = 'Unable to login' + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + resp = self._parse_json(e.cause.read().decode(), None, fatal=False) + if resp: + error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') + if error: + msg += ': ' + error + self.report_warning(msg) + + def _real_initialize(self): + self._login() + + def _extract_video_info(self, data): + thumbnails = [] + for image in traverse_obj(data, ('included', 'images')): + if image.get('type') not in ('episode_image', 'bonus_feature_image'): + continue + thumbnails.extend([{ + 'id': name, + 'url': url, + } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)]) + + attributes = data.get('attributes') or {} + title = traverse_obj(attributes, 'title', 'display_title') + sub_only = attributes.get('is_sponsors_only') + + return { + 'id': str(data.get('id')), + 'display_id': attributes.get('slug'), + 'title': title, + 'description': traverse_obj(attributes, 'description', 'caption'), + 'series': attributes.get('show_title'), + 'season_number': int_or_none(attributes.get('season_number')), + 'season_id': attributes.get('season_id'), + 'episode': title, + 'episode_number': int_or_none(attributes.get('number')), + 'episode_id': str_or_none(data.get('uuid')), + 'channel_id': attributes.get('channel_id'), + 'duration': int_or_none(attributes.get('length')), + 'thumbnails': thumbnails, + 'availability': self._availability( + needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, + is_private=False, is_unlisted=False), + 'tags': attributes.get('genres') + } + + +class RoosterTeethIE(RoosterTeethBaseIE): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'info_dict': { + 'id': '9156', + 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'ext': 'mp4', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', + 'thumbnail': r're:^https?://.*\.png$', + 'series': 'Million Dollars, But...', + 'episode': 'Million Dollars, But... The Game Announcement', + }, + 'params': {'skip_download': True}, + }, { + 'url': 'https://roosterteeth.com/watch/rwby-bonus-25', + 'info_dict': { + 'id': '40432', + 'display_id': 'rwby-bonus-25', + 'title': 'Grimm', + 'description': 'md5:f30ff570741213418a8d2c19868b93ab', + 'episode': 'Grimm', + 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', + 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', + 'ext': 'mp4', + }, + 'params': {'skip_download': True}, + }, { + 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', + 'only_matching': True, + }, { + 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', + 'only_matching': True, + }, { + 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', + 'only_matching': True, + }, { + 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', + 'only_matching': True, + }, { + # only available for FIRST members + 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', + 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}' + + try: + video_data = self._download_json( + api_episode_url + '/videos', display_id, + 'Downloading video JSON metadata')['data'][0] + m3u8_url = video_data['attributes']['url'] + # XXX: additional URL at video_data['links']['download'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + + return { + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + **self._extract_video_info(episode) + } + + +class RoosterTeethSeriesIE(RoosterTeethBaseIE): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://roosterteeth.com/series/rwby?season=7', + 'playlist_count': 13, + 'info_dict': { + 'id': 'rwby-7', + 'title': 'RWBY - Season 7', + } + }, { + 'url': 'https://roosterteeth.com/series/role-initiative', + 'playlist_mincount': 16, + 'info_dict': { + 'id': 'role-initiative', + 'title': 'Role Initiative', + } + }, { + 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', + 'playlist_mincount': 50, + 'info_dict': { + 'id': 'let-s-play-minecraft-9', + 'title': 'Let\'s Play Minecraft - Season 9', + } + }] + + def _entries(self, series_id, season_number): + display_id = join_nonempty(series_id, season_number) + # TODO: extract bonus material + for data in self._download_json( + f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']: + idx = traverse_obj(data, ('attributes', 'number')) + if season_number and idx != season_number: + continue + season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000}) + season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] + for episode in season: + yield self.url_result( + f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}', + RoosterTeethIE.ie_key(), + **self._extract_video_info(episode)) + + def _real_extract(self, url): + series_id = self._match_id(url) + season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none) + + entries = LazyList(self._entries(series_id, season_number)) + return self.playlist_result( + entries, + join_nonempty(series_id, season_number), + join_nonempty(entries[0].get('series'), season_number, delim=' - Season ')) diff --git a/plugins/youtube_download/yt_dlp/extractor/rottentomatoes.py b/plugins/youtube_download/yt_dlp/extractor/rottentomatoes.py new file mode 100644 index 0000000..14c8e82 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rottentomatoes.py @@ -0,0 +1,32 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .internetvideoarchive import InternetVideoArchiveIE + + +class RottenTomatoesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', + 'info_dict': { + 'id': '11028566', + 'ext': 'mp4', + 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') + + return { + '_type': 'url_transparent', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, + 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, + 'title': self._og_search_title(webpage), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/roxwel.py b/plugins/youtube_download/yt_dlp/extractor/roxwel.py new file mode 100644 index 0000000..84bb1aa --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/roxwel.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import unified_strdate, determine_ext + + +class RoxwelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + + _TEST = { + 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', + 'info_dict': { + 'id': 'passionpittakeawalklive', + 'ext': 'flv', + 'title': 'Take A Walk (live)', + 'uploader': 'Passion Pit', + 'uploader_id': 'passionpit', + 'upload_date': '20120928', + 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + filename = mobj.group('filename') + info_url = 'http://www.roxwel.com/api/videos/%s' % filename + info = self._download_json(info_url, filename) + + rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) + best_rate = rtmp_rates[-1] + url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) + rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url') + ext = determine_ext(rtmp_url) + if ext == 'f4v': + rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) + + return { + 'id': filename, + 'title': info['title'], + 'url': rtmp_url, + 'ext': 'flv', + 'description': info['description'], + 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), + 'uploader': info['artist'], + 'uploader_id': info['artistname'], + 'upload_date': unified_strdate(info['dbdate']), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rozhlas.py b/plugins/youtube_download/yt_dlp/extractor/rozhlas.py new file mode 100644 index 0000000..fccf694 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rozhlas.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_start, +) + + +class RozhlasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://prehravac.rozhlas.cz/audio/3421320', + 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', + 'info_dict': { + 'id': '3421320', + 'ext': 'mp3', + 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', + 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' + } + }, { + 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) + + title = self._html_search_regex( + r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', + webpage, 'title', default=None) or remove_start( + self._og_search_title(webpage), 'Radio Wave - ') + description = self._html_search_regex( + r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', + webpage, 'description', fatal=False, group='url') + duration = int_or_none(self._search_regex( + r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) + + return { + 'id': audio_id, + 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, + 'title': title, + 'description': description, + 'duration': duration, + 'vcodec': 'none', + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtbf.py b/plugins/youtube_download/yt_dlp/extractor/rtbf.py new file mode 100644 index 0000000..4b61fdb --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtbf.py @@ -0,0 +1,159 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + strip_or_none, +) + + +class RTBFIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?rtbf\.be/ + (?: + video/[^?]+\?.*\bid=| + ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| + auvio/[^/]+\?.*\b(?P<live>l)?id= + )(?P<id>\d+)''' + _TESTS = [{ + 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', + 'md5': '8c876a1cceeb6cf31b476461ade72384', + 'info_dict': { + 'id': '1921274', + 'ext': 'mp4', + 'title': 'Les Diables au coeur (épisode 2)', + 'description': '(du 25/04/2014)', + 'duration': 3099.54, + 'upload_date': '20140425', + 'timestamp': 1398456300, + } + }, { + # geo restricted + 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', + 'only_matching': True, + }, { + # Live + 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', + 'only_matching': True, + }, { + # Audio + 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', + 'only_matching': True, + }, { + # With Subtitle + 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', + 'only_matching': True, + }] + _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' + _PROVIDERS = { + 'YOUTUBE': 'Youtube', + 'DAILYMOTION': 'Dailymotion', + 'VIMEO': 'Vimeo', + } + _QUALITIES = [ + ('mobile', 'SD'), + ('web', 'MD'), + ('high', 'HD'), + ] + + def _real_extract(self, url): + live, media_id = self._match_valid_url(url).groups() + embed_page = self._download_webpage( + 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), + media_id, query={'id': media_id}) + data = self._parse_json(self._html_search_regex( + r'data-media="([^"]+)"', embed_page, 'media data'), media_id) + + error = data.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + provider = data.get('provider') + if provider in self._PROVIDERS: + return self.url_result(data['url'], self._PROVIDERS[provider]) + + title = data['title'] + is_live = data.get('isLive') + height_re = r'-(\d+)p\.' + formats = [] + + m3u8_url = data.get('urlHlsAes128') or data.get('urlHls') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) + + fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x + http_url = data.get('url') + if formats and http_url and re.search(height_re, http_url): + http_url = fix_url(http_url) + for m3u8_f in formats[:]: + height = m3u8_f.get('height') + if not height: + continue + f = m3u8_f.copy() + del f['protocol'] + f.update({ + 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), + 'url': re.sub(height_re, '-%dp.' % height, http_url), + }) + formats.append(f) + else: + sources = data.get('sources') or {} + for key, format_id in self._QUALITIES: + format_url = sources.get(key) + if not format_url: + continue + height = int_or_none(self._search_regex( + height_re, format_url, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': fix_url(format_url), + 'height': height, + }) + + mpd_url = data.get('urlDash') + if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): + formats.extend(self._extract_mpd_formats( + mpd_url, media_id, mpd_id='dash', fatal=False)) + + audio_url = data.get('urlAudio') + if audio_url: + formats.append({ + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + self._sort_formats(formats) + + subtitles = {} + for track in (data.get('tracks') or {}).values(): + sub_url = track.get('url') + if not sub_url: + continue + subtitles.setdefault(track.get('lang') or 'fr', []).append({ + 'url': sub_url, + }) + + return { + 'id': media_id, + 'formats': formats, + 'title': title, + 'description': strip_or_none(data.get('description')), + 'thumbnail': data.get('thumbnail'), + 'duration': float_or_none(data.get('realDuration')), + 'timestamp': int_or_none(data.get('liveFrom')), + 'series': data.get('programLabel'), + 'subtitles': subtitles, + 'is_live': is_live, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rte.py b/plugins/youtube_download/yt_dlp/extractor/rte.py new file mode 100644 index 0000000..1fbc729 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rte.py @@ -0,0 +1,167 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + float_or_none, + parse_iso8601, + str_or_none, + try_get, + unescapeHTML, + url_or_none, + ExtractorError, +) + + +class RteBaseIE(InfoExtractor): + def _real_extract(self, url): + item_id = self._match_id(url) + + info_dict = {} + formats = [] + + ENDPOINTS = ( + 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', + ) + + for num, ep_url in enumerate(ENDPOINTS, start=1): + try: + data = self._download_json(ep_url + item_id, item_id) + except ExtractorError as ee: + if num < len(ENDPOINTS) or formats: + continue + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise + + # NB the string values in the JSON are stored using XML escaping(!) + show = try_get(data, lambda x: x['shows'][0], dict) + if not show: + continue + + if not info_dict: + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) + info_dict = { + 'id': item_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + } + + mg = try_get(show, lambda x: x['media:group'][0], dict) + if not mg: + continue + + if mg.get('url'): + m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) + if m: + m = m.groupdict() + formats.append({ + 'url': m['url'] + '/' + m['app'], + 'app': m['app'], + 'play_path': m['playpath'], + 'player_url': url, + 'ext': 'flv', + 'format_id': 'rtmp', + }) + + if mg.get('hls_server') and mg.get('hls_url'): + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + if mg.get('hds_server') and mg.get('hds_url'): + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + mg_rte_server = str_or_none(mg.get('rte:server')) + mg_url = str_or_none(mg.get('url')) + if mg_rte_server and mg_url: + hds_url = url_or_none(mg_rte_server + mg_url) + if hds_url: + formats.extend(self._extract_f4m_formats( + hds_url, item_id, f4m_id='hds', fatal=False)) + + self._sort_formats(formats) + + info_dict['formats'] = formats + return info_dict + + +class RteIE(RteBaseIE): + IE_NAME = 'rte' + IE_DESC = 'Raidió Teilifís Éireann TV' + _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', + 'md5': '4a76eb3396d98f697e6e8110563d2604', + 'info_dict': { + 'id': '10478715', + 'ext': 'mp4', + 'title': 'iWitness', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'The spirit of Ireland, one voice and one minute at a time.', + 'duration': 60.046, + 'upload_date': '20151012', + 'timestamp': 1444694160, + }, + } + + +class RteRadioIE(RteBaseIE): + IE_NAME = 'rte:radio' + IE_DESC = 'Raidió Teilifís Éireann radio' + # Radioplayer URLs have two distinct specifier formats, + # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: + # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ + # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. + # An <id> uniquely defines an individual recording, and is the only part we require. + _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' + + _TESTS = [{ + # Old-style player URL; HLS and RTMPE formats + 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', + 'md5': 'c79ccb2c195998440065456b69760411', + 'info_dict': { + 'id': '10507902', + 'ext': 'mp4', + 'title': 'Gloria', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', + 'timestamp': 1451203200, + 'upload_date': '20151227', + 'duration': 7230.0, + }, + }, { + # New-style player URL; RTMPE formats only + 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', + 'info_dict': { + 'id': '3250678', + 'ext': 'flv', + 'title': 'The Lyric Concert with Paul Herriott', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': '', + 'timestamp': 1333742400, + 'upload_date': '20120406', + 'duration': 7199.016, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }] diff --git a/plugins/youtube_download/yt_dlp/extractor/rtl2.py b/plugins/youtube_download/yt_dlp/extractor/rtl2.py new file mode 100644 index 0000000..e291714 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtl2.py @@ -0,0 +1,199 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + ExtractorError, + int_or_none, + strip_or_none, +) + + +class RTL2IE(InfoExtractor): + IE_NAME = 'rtl2' + _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerkönig', + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, { + 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', + 'info_dict': { + 'id': 'anna-erwischt-alex', + 'ext': 'mp4', + 'title': 'Anna erwischt Alex!', + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }] + + def _real_extract(self, url): + vico_id, vivi_id, display_id = self._match_valid_url(url).groups() + if not vico_id: + webpage = self._download_webpage(url, display_id) + + mobj = re.search( + r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + + info = self._download_json( + 'https://service.rtl2.de/api-player-vipo/video.php', + display_id, query={ + 'vico_id': vico_id, + 'vivi_id': vivi_id, + }) + video_info = info['video'] + title = video_info['titel'] + + formats = [] + + rtmp_url = video_info.get('streamurl') + if rtmp_url: + rtmp_url = rtmp_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') + rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': stream_url, + 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + 'quality': 1, + }) + + m3u8_url = video_info.get('streamurl_hls') + if m3u8_url: + formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) + + self._sort_formats(formats) + + return { + 'id': display_id, + 'title': title, + 'thumbnail': video_info.get('image'), + 'description': video_info.get('beschreibung'), + 'duration': int_or_none(video_info.get('duration')), + 'formats': formats, + } + + +class RTL2YouBaseIE(InfoExtractor): + _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/' + + +class RTL2YouIE(RTL2YouBaseIE): + IE_NAME = 'rtl2:you' + _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du', + 'info_dict': { + 'id': '15740', + 'ext': 'mp4', + 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!', + 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01', + 'age_limit': 12, + }, + }, { + 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712', + 'only_matching': True, + }] + _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!' + _GEO_COUNTRIES = ['DE'] + + def _real_extract(self, url): + video_id = self._match_id(url) + + stream_data = self._download_json( + self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) + + data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') + stream_url = unpad_pkcs7(aes_cbc_decrypt_bytes( + compat_b64decode(data), self._AES_KEY, compat_b64decode(iv))) + if b'rtl2_you_video_not_found' in stream_url: + raise ExtractorError('video not found', expected=True) + + formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + + video_data = self._download_json( + self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) + + series = video_data.get('formatTitle') + title = episode = video_data.get('title') or series + if series and series != title: + title = '%s - %s' % (series, title) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': strip_or_none(video_data.get('description')), + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000), + 'series': series, + 'episode': episode, + 'age_limit': int_or_none(video_data.get('minimumAge')), + } + + +class RTL2YouSeriesIE(RTL2YouBaseIE): + IE_NAME = 'rtl2:you:series' + _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)' + _TEST = { + 'url': 'http://you.rtl2.de/videos/115/dragon-ball', + 'info_dict': { + 'id': '115', + }, + 'playlist_mincount': 5, + } + + def _real_extract(self, url): + series_id = self._match_id(url) + stream_data = self._download_json( + self._BACKWERK_BASE_URL + 'videos', + series_id, query={ + 'formatId': series_id, + 'limit': 1000000000, + }) + + entries = [] + for video in stream_data.get('videos', []): + video_id = compat_str(video['videoId']) + if not video_id: + continue + entries.append(self.url_result( + 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id), + 'RTL2You', video_id)) + return self.playlist_result(entries, series_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/rtlnl.py b/plugins/youtube_download/yt_dlp/extractor/rtlnl.py new file mode 100644 index 0000000..9eaa06f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtlnl.py @@ -0,0 +1,146 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, +) + + +class RtlNlIE(InfoExtractor): + IE_NAME = 'rtl.nl' + IE_DESC = 'rtl.nl and rtlxl.nl' + _VALID_URL = r'''(?x) + https?://(?:(?:www|static)\.)? + (?: + rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| + rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| + embed\.rtl\.nl/\#uuid= + ) + (?P<id>[0-9a-f-]+)''' + + _TESTS = [{ + # new URL schema + 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', + 'md5': '490428f1187b60d714f34e1f2e3af0b6', + 'info_dict': { + 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1593293400, + 'upload_date': '20200627', + 'duration': 661.08, + }, + }, { + # old URL schema + 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'md5': '473d1946c1fdd050b2c0161a4b13c373', + 'info_dict': { + 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1461951000, + 'upload_date': '20160429', + 'duration': 1167.96, + }, + 'skip': '404', + }, { + # best format available a3t + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'md5': 'dea7474214af1271d91ef332fb8be7ea', + 'info_dict': { + 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', + 'ext': 'mp4', + 'timestamp': 1424039400, + 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', + 'upload_date': '20150215', + 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', + } + }, { + # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) + # best format available nettv + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', + 'info_dict': { + 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', + 'ext': 'mp4', + 'title': 'RTL Nieuws - Meer beelden van overval juwelier', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', + 'timestamp': 1437233400, + 'upload_date': '20150718', + 'duration': 30.474, + }, + 'params': { + 'skip_download': True, + }, + }, { + # encrypted m3u8 streams, georestricted + 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', + 'only_matching': True, + }, { + 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', + 'only_matching': True, + }, { + 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', + 'only_matching': True, + }, { + 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', + 'only_matching': True, + }, { + 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', + 'only_matching': True, + }, { + # new embed URL schema + 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'only_matching': True, + }] + + def _real_extract(self, url): + uuid = self._match_id(url) + info = self._download_json( + 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, + uuid) + + material = info['material'][0] + title = info['abstracts'][0]['name'] + subtitle = material.get('title') + if subtitle: + title += ' - %s' % subtitle + description = material.get('synopsis') + + meta = info.get('meta', {}) + + videopath = material['videopath'] + m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath + + formats = self._extract_m3u8_formats( + m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) + self._sort_formats(formats) + + thumbnails = [] + + for p in ('poster_base_url', '"thumb_base_url"'): + if not meta.get(p): + continue + + thumbnails.append({ + 'url': self._proto_relative_url(meta[p] + uuid), + 'width': int_or_none(self._search_regex( + r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), + 'height': int_or_none(self._search_regex( + r'/sz=[0-9]+x([0-9]+)', + meta[p], 'thumbnail height', fatal=False)) + }) + + return { + 'id': uuid, + 'title': title, + 'formats': formats, + 'timestamp': material['original_date'], + 'description': description, + 'duration': parse_duration(material.get('duration')), + 'thumbnails': thumbnails, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtnews.py b/plugins/youtube_download/yt_dlp/extractor/rtnews.py new file mode 100644 index 0000000..68b6044 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtnews.py @@ -0,0 +1,199 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class RTNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rt\.com/[^/]+/(?:[^/]+/)?(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://www.rt.com/sport/546301-djokovic-arrives-belgrade-crowds/', + 'playlist_mincount': 2, + 'info_dict': { + 'id': '546301', + 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)', + 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d', + 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png' + }, + }, { + 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '535980', + 'title': 'The plot to assassinate Julian Assange', + 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd', + 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png' + }, + 'playlist': [{ + 'info_dict': { + 'id': '6152271d85f5400464496162', + 'ext': 'mp4', + 'title': '6152271d85f5400464496162', + }, + }] + }] + + def _entries(self, webpage): + video_urls = set(re.findall(r'https://cdnv\.rt\.com/.*[a-f0-9]+\.mp4', webpage)) + for v_url in video_urls: + v_id = re.search(r'([a-f0-9]+)\.mp4', v_url).group(1) + if v_id: + yield { + 'id': v_id, + 'title': v_id, + 'url': v_url, + } + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + return { + '_type': 'playlist', + 'id': id, + 'entries': self._entries(webpage), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } + + +class RTDocumentryIE(InfoExtractor): + _VALID_URL = r'https?://rtd\.rt\.com/(?:(?:series|shows)/[^/]+|films)/(?P<id>[^/?$&#]+)' + + _TESTS = [{ + 'url': 'https://rtd.rt.com/films/escobars-hitman/', + 'info_dict': { + 'id': 'escobars-hitman', + 'ext': 'mp4', + 'title': "Escobar's Hitman. Former drug-gang killer, now loved and loathed in Colombia", + 'description': 'md5:647c76984b7cb9a8b52a567e87448d88', + 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg', + 'average_rating': 8.53, + 'duration': 3134.0 + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/', + 'info_dict': { + 'id': 'iskander-tactical-system-natos-headache', + 'ext': 'mp4', + 'title': "Iskander tactical system. NATO's headache | The Kalashnikova Show. Episode 10", + 'description': 'md5:da7c24a0aa67bc2bb88c86658508ca87', + 'thumbnail': 'md5:89de8ce38c710b7c501ff02d47e2aa89', + 'average_rating': 9.27, + 'duration': 274.0, + 'timestamp': 1605726000, + 'view_count': int, + 'upload_date': '20201118' + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/', + 'info_dict': { + 'id': 'introduction-to-safe-digital-life-ep2', + 'ext': 'mp4', + 'title': 'How to Keep your Money away from Hackers | I am Hacked. Episode 2', + 'description': 'md5:c46fa9a5af86c0008c45a3940a8cce87', + 'thumbnail': 'md5:a5e81b9bf5aed8f5e23d9c053601b825', + 'average_rating': 10.0, + 'duration': 1524.0, + 'timestamp': 1636977600, + 'view_count': int, + 'upload_date': '20211115' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + ld_json = self._search_json_ld(webpage, None, fatal=False) + if not ld_json: + self.raise_no_formats('No video/audio found at the provided url.', expected=True) + media_json = self._parse_json( + self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'), + id, transform_source=js_to_json) + if 'title' not in ld_json and 'title' in media_json: + ld_json['title'] = media_json['title'] + formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')] + + return { + 'id': id, + 'thumbnail': media_json.get('image'), + 'formats': formats, + **ld_json + } + + +class RTDocumentryPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://rtd\.rt\.com/(?:series|shows)/(?P<id>[^/]+)/$' + + _TESTS = [{ + 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/', + 'playlist_mincount': 6, + 'info_dict': { + 'id': 'i-am-hacked-trailer', + }, + }, { + 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/', + 'playlist_mincount': 34, + 'info_dict': { + 'id': 'the-kalashnikova-show-military-secrets-anna-knishenko', + }, + }] + + def _entries(self, webpage, id): + video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage)) + for v_url in video_urls: + if id not in v_url: + continue + yield self.url_result( + 'https://rtd.rt.com%s' % v_url, + ie=RTDocumentryIE.ie_key()) + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + return { + '_type': 'playlist', + 'id': id, + 'entries': self._entries(webpage, id), + } + + +class RuptlyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruptly\.tv/[a-z]{2}/videos/(?P<id>\d+-\d+)' + + _TESTS = [{ + 'url': 'https://www.ruptly.tv/en/videos/20220112-020-Japan-Double-trouble-Tokyo-zoo-presents-adorable-panda-twins', + 'info_dict': { + 'id': '20220112-020', + 'ext': 'mp4', + 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly', + 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a', + 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False) + if not m3u8_url: + self.raise_no_formats('No video/audio found at the provided url.', expected=True) + formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, id, ext='mp4') + return { + 'id': id, + 'formats': formats, + 'subtitles': subs, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtp.py b/plugins/youtube_download/yt_dlp/extractor/rtp.py new file mode 100644 index 0000000..c165ade --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtp.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json +import re +import json +import urllib.parse +import base64 + + +class RTPIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' + _TESTS = [{ + 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', + 'md5': 'e736ce0c665e459ddb818546220b4ef8', + 'info_dict': { + 'id': 'e174042', + 'ext': 'mp3', + 'title': 'Paixões Cruzadas', + 'description': 'As paixões musicais de António Cartaxo e António Macedo', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }, { + 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', + 'only_matching': True, + }] + + _RX_OBFUSCATION = re.compile(r'''(?xs) + atob\s*\(\s*decodeURIComponent\s*\(\s* + (\[[0-9A-Za-z%,'"]*\]) + \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) + ''') + + def __unobfuscate(self, data, *, video_id): + if data.startswith('{'): + data = self._RX_OBFUSCATION.sub( + lambda m: json.dumps( + base64.b64decode(urllib.parse.unquote( + ''.join(self._parse_json(m.group(1), video_id)) + )).decode('iso-8859-1')), + data) + return js_to_json(data) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta( + 'twitter:title', webpage, display_name='title', fatal=True) + + f, config = self._search_regex( + r'''(?sx) + var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* + var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) + ''', webpage, + 'player config', group=('f', 'config')) + + f = self._parse_json( + f, video_id, + lambda data: self.__unobfuscate(data, video_id=video_id)) + config = self._parse_json( + config, video_id, + lambda data: self.__unobfuscate(data, video_id=video_id)) + + formats = [] + if isinstance(f, dict): + f_hls = f.get('hls') + if f_hls is not None: + formats.extend(self._extract_m3u8_formats( + f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) + + f_dash = f.get('dash') + if f_dash is not None: + formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) + else: + formats.append({ + 'format_id': 'f', + 'url': f, + 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, + }) + + subtitles = {} + + vtt = config.get('vtt') + if vtt is not None: + for lcode, lname, url in vtt: + subtitles.setdefault(lcode, []).append({ + 'name': lname, + 'url': url, + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._html_search_meta(['description', 'twitter:description'], webpage), + 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), + 'subtitles': subtitles, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtrfm.py b/plugins/youtube_download/yt_dlp/extractor/rtrfm.py new file mode 100644 index 0000000..93d51e8 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtrfm.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/shows/breakfast/', + 'md5': '46168394d3a5ce237cf47e85d0745413', + 'info_dict': { + 'id': 'breakfast-2021-11-16', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + 'skip': 'ID and md5 changes daily', + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', + 'md5': '396bedf1e40f96c62b30d4999202a790', + 'info_dict': { + 'id': 'breakfast-2021-11-11', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2021-11-11', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2020-06-01', + 'description': r're:^Breakfast with Taylah ', + }, + 'skip': 'This audio has expired', + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show, date, title = self._search_regex( + r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''', + webpage, 'details', group=('show', 'date', 'title')) + url = self._download_json( + 'https://restreams.rtrfm.com.au/rzz', + show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] + # This is the only indicator of an error until trying to download the URL and + # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). + if '.mp4' in url: + url = None + self.raise_no_formats('Expired or no episode on this date', expected=True) + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'series': title, + 'url': url, + 'release_date': date, + 'description': self._og_search_description(webpage), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rts.py b/plugins/youtube_download/yt_dlp/extractor/rts.py new file mode 100644 index 0000000..865a730 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rts.py @@ -0,0 +1,235 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .srgssr import SRGSSRIE +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, + parse_iso8601, + unescapeHTML, + urljoin, +) + + +class RTSIE(SRGSSRIE): + IE_DESC = 'RTS.ch' + _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' + + _TESTS = [ + { + 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', + 'md5': '753b877968ad8afaeddccc374d4256a5', + 'info_dict': { + 'id': '3449373', + 'display_id': 'les-enfants-terribles', + 'ext': 'mp4', + 'duration': 1488, + 'title': 'Les Enfants Terribles', + 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', + 'uploader': 'Divers', + 'upload_date': '19680921', + 'timestamp': -40280400, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, + { + 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', + 'info_dict': { + 'id': '5624065', + 'title': 'Passe-moi les jumelles', + }, + 'playlist_mincount': 4, + }, + { + 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', + 'info_dict': { + 'id': '5745975', + 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', + 'ext': 'mp4', + 'duration': 48, + 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', + 'description': 'Hockey - Playoff', + 'uploader': 'Hockey', + 'upload_date': '20140403', + 'timestamp': 1396556882, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + 'skip': 'Blocked outside Switzerland', + }, + { + 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', + 'md5': '9bb06503773c07ce83d3cbd793cebb91', + 'info_dict': { + 'id': '5745356', + 'display_id': 'londres-cachee-par-un-epais-smog', + 'ext': 'mp4', + 'duration': 33, + 'title': 'Londres cachée par un épais smog', + 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', + 'uploader': 'L\'actu en vidéo', + 'upload_date': '20140403', + 'timestamp': 1396537322, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, + { + 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', + 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', + 'info_dict': { + 'id': '5706148', + 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', + 'ext': 'mp3', + 'duration': 123, + 'title': '"Urban Hippie", de Damien Krisl', + 'description': 'Des Hippies super glam.', + 'upload_date': '20140403', + 'timestamp': 1396551600, + }, + }, + { + # article with videos on rhs + 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', + 'info_dict': { + 'id': '6693917', + 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', + }, + 'playlist_mincount': 5, + }, + { + 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + m = self._match_valid_url(url) + media_id = m.group('rts_id') or m.group('id') + display_id = m.group('display_id') or media_id + + def download_json(internal_id): + return self._download_json( + 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, + display_id) + + all_info = download_json(media_id) + + # media_id extracted out of URL is not always a real id + if 'video' not in all_info and 'audio' not in all_info: + entries = [] + + for item in all_info.get('items', []): + item_url = item.get('url') + if not item_url: + continue + entries.append(self.url_result(item_url, 'RTS')) + + if not entries: + page, urlh = self._download_webpage_handle(url, display_id) + if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: + return self.url_result(urlh.geturl(), 'RTS') + + # article with videos on rhs + videos = re.findall( + r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"', + page) + if not videos: + videos = re.findall( + r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', + page) + if videos: + entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + + if entries: + return self.playlist_result(entries, media_id, all_info.get('title')) + + internal_id = self._html_search_regex( + r'<(?:video|audio) data-id="([0-9]+)"', page, + 'internal video id') + all_info = download_json(internal_id) + + media_type = 'video' if 'video' in all_info else 'audio' + + # check for errors + self._get_media_data('rts', media_type, media_id) + + info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] + + title = info['title'] + + def extract_bitrate(url): + return int_or_none(self._search_regex( + r'-([0-9]+)k\.', url, 'bitrate', default=None)) + + formats = [] + streams = info.get('streams', {}) + for format_id, format_url in streams.items(): + if format_id == 'hds_sd' and 'hds' in streams: + continue + if format_id == 'hls_sd' and 'hls' in streams: + continue + ext = determine_ext(format_url) + if ext in ('m3u8', 'f4m'): + format_url = self._get_tokenized_src(format_url, media_id, format_id) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) + + download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '') + for media in info.get('media', []): + media_url = media.get('url') + if not media_url or re.match(r'https?://', media_url): + continue + rate = media.get('rate') + ext = media.get('ext') or determine_ext(media_url, 'mp4') + format_id = ext + if rate: + format_id += '-%dk' % rate + formats.append({ + 'format_id': format_id, + 'url': urljoin(download_base, media_url), + 'tbr': rate or extract_bitrate(media_url), + }) + + self._check_formats(formats, media_id) + self._sort_formats(formats) + + duration = info.get('duration') or info.get('cutout') or info.get('cutduration') + if isinstance(duration, compat_str): + duration = parse_duration(duration) + + return { + 'id': media_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': info.get('intro'), + 'duration': duration, + 'view_count': int_or_none(info.get('plays')), + 'uploader': info.get('programName'), + 'timestamp': parse_iso8601(info.get('broadcast_date')), + 'thumbnail': unescapeHTML(info.get('preview_image_url')), + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtve.py b/plugins/youtube_download/yt_dlp/extractor/rtve.py new file mode 100644 index 0000000..af1bb94 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtve.py @@ -0,0 +1,355 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import io +import sys + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_struct_unpack, +) +from ..utils import ( + determine_ext, + ExtractorError, + float_or_none, + qualities, + remove_end, + remove_start, + std_headers, + try_get, +) + +_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) + + +class RTVEALaCartaIE(InfoExtractor): + IE_NAME = 'rtve.es:alacarta' + IE_DESC = 'RTVE a la carta' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', + 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', + 'info_dict': { + 'id': '2491869', + 'ext': 'mp4', + 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', + 'duration': 5024.566, + 'series': 'Balonmano', + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }, { + 'note': 'Live stream', + 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', + 'info_dict': { + 'id': '1694255', + 'ext': 'mp4', + 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': 'live stream', + }, + }, { + 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', + 'md5': 'd850f3c8731ea53952ebab489cf81cbf', + 'info_dict': { + 'id': '4236788', + 'ext': 'mp4', + 'title': 'Servir y proteger - Capítulo 104', + 'duration': 3222.0, + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }, { + 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', + 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, + }] + + def _real_initialize(self): + user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + self._manager = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info')['manager'] + + @staticmethod + def _decrypt_url(png): + encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) + while True: + length = compat_struct_unpack('!I', encrypted_data.read(4))[0] + chunk_type = encrypted_data.read(4) + if chunk_type == b'IEND': + break + data = encrypted_data.read(length) + if chunk_type == b'tEXt': + alphabet_data, text = data.split(b'\0') + quality, url_data = text.split(b'%%') + alphabet = [] + e = 0 + d = 0 + for l in _bytes_to_chr(alphabet_data): + if d == 0: + alphabet.append(l) + d = e = (e + 1) % 4 + else: + d -= 1 + url = '' + f = 0 + e = 3 + b = 1 + for letter in _bytes_to_chr(url_data): + if f == 0: + l = int(letter) * 10 + f = 1 + else: + if e == 0: + l += int(letter) + url += alphabet[l] + e = (b + 3) % 4 + f = 0 + b += 1 + else: + e -= 1 + + yield quality.decode(), url + encrypted_data.read(4) # CRC + + def _extract_png_formats(self, video_id): + png = self._download_webpage( + 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), + video_id, 'Downloading url information', query={'q': 'v2'}) + q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) + formats = [] + for quality, video_url in self._decrypt_url(png): + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, 'dash', fatal=False)) + else: + formats.append({ + 'format_id': quality, + 'quality': q(quality), + 'url': video_url, + }) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) + title = info['title'].strip() + formats = self._extract_png_formats(video_id) + + subtitles = None + sbt_file = info.get('sbtFile') + if sbt_file: + subtitles = self.extract_subtitles(video_id, sbt_file) + + is_live = info.get('live') is True + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': info.get('image'), + 'subtitles': subtitles, + 'duration': float_or_none(info.get('duration'), 1000), + 'is_live': is_live, + 'series': info.get('programTitle'), + } + + def _get_subtitles(self, video_id, sub_file): + subs = self._download_json( + sub_file + '.json', video_id, + 'Downloading subtitles info')['page']['items'] + return dict( + (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) + for s in subs) + + +class RTVEAudioIE(RTVEALaCartaIE): + IE_NAME = 'rtve.es:audio' + IE_DESC = 'RTVE audio' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' + + _TESTS = [{ + 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', + 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', + 'info_dict': { + 'id': '5889192', + 'ext': 'mp3', + 'title': 'Códigos informáticos', + 'thumbnail': r're:https?://.+/1598856591583.jpg', + 'duration': 349.440, + 'series': 'A hombros de gigantes', + }, + }, { + 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', + 'md5': '072855ab89a9450e0ba314c717fa5ebc', + 'info_dict': { + 'id': '5791165', + 'ext': 'mp3', + 'title': 'Ignatius Farray', + 'thumbnail': r're:https?://.+/1613243011863.jpg', + 'duration': 3559.559, + 'series': 'En Radio 3' + }, + }, { + 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', + 'md5': '0eadab248cc8dd193fa5765712e84d5c', + 'info_dict': { + 'id': '6082623', + 'ext': 'mp3', + 'title': 'Capítulo 26 y último: La muerte de Victor', + 'thumbnail': r're:https?://.+/1632147445707.jpg', + 'duration': 3174.086, + 'series': 'Frankenstein o el moderno Prometeo' + }, + }] + + def _extract_png_formats(self, audio_id): + """ + This function retrieves media related png thumbnail which obfuscate + valuable information about the media. This information is decrypted + via base class _decrypt_url function providing media quality and + media url + """ + png = self._download_webpage( + 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' % + (self._manager, audio_id), + audio_id, 'Downloading url information', query={'q': 'v2'}) + q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) + formats = [] + for quality, audio_url in self._decrypt_url(png): + ext = determine_ext(audio_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + audio_url, audio_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + audio_url, audio_id, 'dash', fatal=False)) + else: + formats.append({ + 'format_id': quality, + 'quality': q(quality), + 'url': audio_url, + }) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + audio_id = self._match_id(url) + info = self._download_json( + 'https://www.rtve.es/api/audios/%s.json' % audio_id, + audio_id)['page']['items'][0] + + return { + 'id': audio_id, + 'title': info['title'].strip(), + 'thumbnail': info.get('thumbnail'), + 'duration': float_or_none(info.get('duration'), 1000), + 'series': try_get(info, lambda x: x['programInfo']['title']), + 'formats': self._extract_png_formats(audio_id), + } + + +class RTVEInfantilIE(RTVEALaCartaIE): + IE_NAME = 'rtve.es:infantil' + IE_DESC = 'RTVE infantil' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' + + _TESTS = [{ + 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', + 'md5': '5747454717aedf9f9fdf212d1bcfc48d', + 'info_dict': { + 'id': '3040283', + 'ext': 'mp4', + 'title': 'Maneras de vivir', + 'thumbnail': r're:https?://.+/1426182947956\.JPG', + 'duration': 357.958, + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }] + + +class RTVELiveIE(RTVEALaCartaIE): + IE_NAME = 'rtve.es:live' + IE_DESC = 'RTVE.es live streams' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/directo/la-1/', + 'info_dict': { + 'id': 'la-1', + 'ext': 'mp4', + 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + 'skip_download': 'live stream', + } + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') + title = remove_start(title, 'Estoy viendo ') + + vidplayer_id = self._search_regex( + (r'playerId=player([0-9]+)', + r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', + r'data-id=["\'](\d+)'), + webpage, 'internal video ID') + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_png_formats(vidplayer_id), + 'is_live': True, + } + + +class RTVETelevisionIE(InfoExtractor): + IE_NAME = 'rtve.es:television' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + + _TEST = { + 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + 'info_dict': { + 'id': '3069778', + 'ext': 'mp4', + 'title': 'Documentos TV - La revolución del móvil', + 'duration': 3496.948, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + alacarta_url = self._search_regex( + r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + webpage, 'alacarta url', default=None) + if alacarta_url is None: + raise ExtractorError( + 'The webpage doesn\'t contain any video', expected=True) + + return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) diff --git a/plugins/youtube_download/yt_dlp/extractor/rtvnh.py b/plugins/youtube_download/yt_dlp/extractor/rtvnh.py new file mode 100644 index 0000000..6a00f70 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtvnh.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class RTVNHIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.rtvnh.nl/video/131946', + 'md5': 'cdbec9f44550763c8afc96050fa747dc', + 'info_dict': { + 'id': '131946', + 'ext': 'mp4', + 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', + 'thumbnail': r're:^https?:.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + meta = self._parse_json(self._download_webpage( + 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + + status = meta.get('status') + if status != 200: + raise ExtractorError( + '%s returned error code %d' % (self.IE_NAME, status), expected=True) + + formats = [] + rtmp_formats = self._extract_smil_formats( + 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + formats.extend(rtmp_formats) + + for rtmp_format in rtmp_formats: + rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + rtsp_format = rtmp_format.copy() + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'url': rtmp_url.replace('rtmp://', 'rtsp://'), + 'protocol': 'rtsp', + }) + formats.append(rtsp_format) + http_base_url = rtmp_url.replace('rtmp://', 'http://') + formats.extend(self._extract_m3u8_formats( + http_base_url + '/playlist.m3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + http_base_url + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': meta['title'].strip(), + 'thumbnail': meta.get('image'), + 'formats': formats + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rtvs.py b/plugins/youtube_download/yt_dlp/extractor/rtvs.py new file mode 100644 index 0000000..6573b26 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rtvs.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTVSIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)' + _TESTS = [{ + # radio archive + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '414872', + 'ext': 'mp3', + 'title': 'Ostrov pokladov 1 časť.mp3' + }, + 'params': { + 'skip_download': True, + } + }, { + # tv archive + 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', + 'md5': '85e2c55cf988403b70cac24f5c086dc6', + 'info_dict': { + 'id': '63118', + 'ext': 'mp4', + 'title': 'Amaro Džives - Náš deň', + 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' + }, + 'params': { + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._search_regex( + r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'playlist url', group='url') + + data = self._download_json( + playlist_url, video_id, 'Downloading playlist')[0] + return self._parse_jwplayer_data(data, video_id=video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/ruhd.py b/plugins/youtube_download/yt_dlp/extractor/ruhd.py new file mode 100644 index 0000000..3c8053a --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ruhd.py @@ -0,0 +1,45 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': r're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'<param name="src" value="([^"]+)"', webpage, 'video url') + title = self._html_search_regex( + r'<title>([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', + webpage, 'title') + description = self._html_search_regex( + r'(?s)
(.+?)', + webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'\d+)' + _TESTS = [ + { + 'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/', + 'md5': 'ffccac2c23799dabbd192621ae4d04f3', + 'info_dict': { + 'id': '3065157', + 'ext': 'mp4', + 'title': 'Shot It-(mmd hmv)', + 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg', + 'duration': 347.0, + 'age_limit': 18 + } + }, + { + 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/', + 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86', + 'info_dict': { + 'id': '3065296', + 'ext': 'mp4', + 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]', + 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg', + 'duration': 938.0, + 'age_limit': 18 + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = [] + + for mobj in re.finditer(r']+href="(?P[^"]+download=true[^"]+)".*>(?P[^\s]+) (?P[^<]+)p', webpage): + url, ext, quality = mobj.groups() + formats.append({ + 'url': url, + 'ext': ext.lower(), + 'quality': quality, + }) + + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None) + duration = self._html_search_regex(r'"icon-clock">\s+((?:\d+:?)+)', webpage, 'duration', default=None) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'thumbnail': thumbnail, + 'duration': parse_duration(duration), + 'age_limit': 18 + } diff --git a/plugins/youtube_download/yt_dlp/extractor/rumble.py b/plugins/youtube_download/yt_dlp/extractor/rumble.py new file mode 100644 index 0000000..49c1f44 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rumble.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..compat import compat_str, compat_HTTPError +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, + try_get, + ExtractorError, +) + + +class RumbleEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)' + _TESTS = [{ + 'url': 'https://rumble.com/embed/v5pv5f', + 'md5': '36a18a049856720189f30977ccbb2c34', + 'info_dict': { + 'id': 'v5pv5f', + 'ext': 'mp4', + 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', + 'timestamp': 1571611968, + 'upload_date': '20191020', + } + }, { + 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P%s)' % RumbleEmbedIE._VALID_URL, + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + video = self._download_json( + 'https://rumble.com/embedJS/', video_id, + query={'request': 'video', 'v': video_id}) + title = video['title'] + + formats = [] + for height, ua in (video.get('ua') or {}).items(): + for i in range(2): + f_url = try_get(ua, lambda x: x[i], compat_str) + if f_url: + ext = determine_ext(f_url) + f = { + 'ext': ext, + 'format_id': '%s-%sp' % (ext, height), + 'height': int_or_none(height), + 'url': f_url, + } + bitrate = try_get(ua, lambda x: x[i + 2]['bitrate']) + if bitrate: + f['tbr'] = int_or_none(bitrate) + formats.append(f) + self._sort_formats(formats) + + author = video.get('author') or {} + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': video.get('i'), + 'timestamp': parse_iso8601(video.get('pubDate')), + 'channel': author.get('name'), + 'channel_url': author.get('url'), + 'duration': int_or_none(video.get('duration')), + } + + +class RumbleChannelIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?rumble\.com/(?:c|user)/(?P[^&?#$/]+))' + + _TESTS = [{ + 'url': 'https://rumble.com/c/Styxhexenhammer666', + 'playlist_mincount': 1160, + 'info_dict': { + 'id': 'Styxhexenhammer666', + }, + }, { + 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', + 'playlist_count': 4, + 'info_dict': { + 'id': 'goldenpoodleharleyeuna', + }, + }] + + def entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage): + yield self.url_result('https://rumble.com' + video_url) + + def _real_extract(self, url): + url, playlist_id = self._match_valid_url(url).groups() + return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/rutube.py b/plugins/youtube_download/yt_dlp/extractor/rutube.py new file mode 100644 index 0000000..2f753b4 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rutube.py @@ -0,0 +1,329 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import itertools + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + determine_ext, + bool_or_none, + int_or_none, + parse_qs, + try_get, + unified_timestamp, + url_or_none, +) + + +class RutubeBaseIE(InfoExtractor): + def _download_api_info(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/video/%s/' % video_id, + video_id, 'Downloading video JSON', + 'Unable to download video JSON', query=query) + + @staticmethod + def _extract_info(video, video_id=None, require_title=True): + title = video['title'] if require_title else video.get('title') + + age_limit = video.get('is_adult') + if age_limit is not None: + age_limit = 18 if age_limit is True else 0 + + uploader_id = try_get(video, lambda x: x['author']['id']) + category = try_get(video, lambda x: x['category']['name']) + + return { + 'id': video.get('id') or video_id if video_id else video['id'], + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnail_url'), + 'duration': int_or_none(video.get('duration')), + 'uploader': try_get(video, lambda x: x['author']['name']), + 'uploader_id': compat_str(uploader_id) if uploader_id else None, + 'timestamp': unified_timestamp(video.get('created_ts')), + 'category': [category] if category else None, + 'age_limit': age_limit, + 'view_count': int_or_none(video.get('hits')), + 'comment_count': int_or_none(video.get('comments_count')), + 'is_live': bool_or_none(video.get('is_livestream')), + } + + def _download_and_extract_info(self, video_id, query=None): + return self._extract_info( + self._download_api_info(video_id, query=query), video_id) + + def _download_api_options(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/play/options/%s/' % video_id, + video_id, 'Downloading options JSON', + 'Unable to download options JSON', + headers=self.geo_verification_headers(), query=query) + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['video_balancer'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + return formats + + def _download_and_extract_formats(self, video_id, query=None): + return self._extract_formats( + self._download_api_options(video_id, query=query), video_id) + + +class RutubeIE(RutubeBaseIE): + IE_NAME = 'rutube' + IE_DESC = 'Rutube videos' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P[\da-z]{32})' + + _TESTS = [{ + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '1d24f180fac7a02f3900712e5a5764d6', + 'info_dict': { + 'id': '3eac3b4561676c17df9132a9a1e62e3e', + 'ext': 'mp4', + 'title': 'Раненный кенгуру забежал в аптеку', + 'description': 'http://www.ntdtv.ru ', + 'duration': 81, + 'uploader': 'NTDRussian', + 'uploader_id': '29790', + 'timestamp': 1381943602, + 'upload_date': '20131016', + 'age_limit': 0, + }, + }, { + 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) + + @staticmethod + def _extract_urls(webpage): + return [mobj.group('url') for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_and_extract_info(video_id) + info['formats'] = self._download_and_extract_formats(video_id) + return info + + +class RutubeEmbedIE(RutubeBaseIE): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' + + _TESTS = [{ + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'timestamp': 1387830582, + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix

восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://rutube.ru/play/embed/8083783', + 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, + }] + + def _real_extract(self, url): + embed_id = self._match_id(url) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = parse_qs(url) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info + + +class RutubePlaylistBaseIE(RutubeBaseIE): + def _next_page_url(self, page_num, playlist_id, *args, **kwargs): + return self._PAGE_TEMPLATE % (playlist_id, page_num) + + def _entries(self, playlist_id, *args, **kwargs): + next_page_url = None + for pagenum in itertools.count(1): + page = self._download_json( + next_page_url or self._next_page_url( + pagenum, playlist_id, *args, **kwargs), + playlist_id, 'Downloading page %s' % pagenum) + + results = page.get('results') + if not results or not isinstance(results, list): + break + + for result in results: + video_url = url_or_none(result.get('video_url')) + if not video_url: + continue + entry = self._extract_info(result, require_title=False) + entry.update({ + '_type': 'url', + 'url': video_url, + 'ie_key': RutubeIE.ie_key(), + }) + yield entry + + next_page_url = page.get('next') + if not next_page_url or not page.get('has_next'): + break + + def _extract_playlist(self, playlist_id, *args, **kwargs): + return self.playlist_result( + self._entries(playlist_id, *args, **kwargs), + playlist_id, kwargs.get('playlist_name')) + + def _real_extract(self, url): + return self._extract_playlist(self._match_id(url)) + + +class RutubeTagsIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:tags' + IE_DESC = 'Rutube tags' + _VALID_URL = r'https?://rutube\.ru/tags/video/(?P\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/tags/video/1800/', + 'info_dict': { + 'id': '1800', + }, + 'playlist_mincount': 68, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + + +class RutubeMovieIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:movie' + IE_DESC = 'Rutube movies' + _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P\d+)' + _TESTS = [] + + _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + + def _real_extract(self, url): + movie_id = self._match_id(url) + movie = self._download_json( + self._MOVIE_TEMPLATE % movie_id, movie_id, + 'Downloading movie JSON') + return self._extract_playlist( + movie_id, playlist_name=movie.get('name')) + + +class RutubePersonIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:person' + IE_DESC = 'Rutube person videos' + _VALID_URL = r'https?://rutube\.ru/video/person/(?P\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/video/person/313878/', + 'info_dict': { + 'id': '313878', + }, + 'playlist_mincount': 37, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P\d+)' + _TESTS = [{ + 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', + 'info_dict': { + 'id': '3097', + }, + 'playlist_count': 27, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'only_matching': True, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' + + @classmethod + def suitable(cls, url): + from ..utils import int_or_none, parse_qs + + if not super(RutubePlaylistIE, cls).suitable(url): + return False + params = parse_qs(url) + return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) + + def _next_page_url(self, page_num, playlist_id, item_kind): + return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) + + def _real_extract(self, url): + qs = parse_qs(url) + playlist_kind = qs['pl_type'][0] + playlist_id = qs['pl_id'][0] + return self._extract_playlist(playlist_id, item_kind=playlist_kind) + + +class RutubeChannelIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:channel' + IE_DESC = 'Rutube channel' + _VALID_URL = r'https?://rutube\.ru/channel/(?P\d+)/videos' + _TESTS = [{ + 'url': 'https://rutube.ru/channel/639184/videos/', + 'info_dict': { + 'id': '639184', + }, + 'playlist_mincount': 133, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' diff --git a/plugins/youtube_download/yt_dlp/extractor/rutv.py b/plugins/youtube_download/yt_dlp/extractor/rutv.py new file mode 100644 index 0000000..3de86b2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/rutv.py @@ -0,0 +1,211 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none +) + + +class RUTVIE(InfoExtractor): + IE_DESC = 'RUTV.RU' + _VALID_URL = r'''(?x) + https?:// + (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ + (?P + flash\d+v/container\.swf\?id=| + iframe/(?Pswf|video|live)/id/| + index/iframe/cast_id/ + ) + (?P\d+) + ''' + + _TESTS = [ + { + 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', + 'info_dict': { + 'id': '774471', + 'ext': 'mp4', + 'title': 'Монологи на все времена', + 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', + 'duration': 2906, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', + 'info_dict': { + 'id': '774016', + 'ext': 'mp4', + 'title': 'Чужой в семье Сталина', + 'description': '', + 'duration': 2539, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', + 'info_dict': { + 'id': '766888', + 'ext': 'mp4', + 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', + 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', + 'duration': 279, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', + 'info_dict': { + 'id': '771852', + 'ext': 'mp4', + 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', + 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', + 'duration': 3096, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', + 'info_dict': { + 'id': '51499', + 'ext': 'flv', + 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', + 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', + }, + 'skip': 'Translation has finished', + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', + 'info_dict': { + 'id': '21', + 'ext': 'mp4', + 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', + 'only_matching': True, + }, + ] + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) + if mobj: + return mobj.group('url') + + mobj = re.search( + r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + video_path = mobj.group('path') + + if re.match(r'flash\d+v', video_path): + video_type = 'video' + elif video_path.startswith('iframe'): + video_type = mobj.group('type') + if video_type == 'swf': + video_type = 'video' + elif video_path.startswith('index/iframe/cast_id'): + video_type = 'live' + + is_live = video_type == 'live' + + json_data = self._download_json( + 'http://player.vgtrk.com/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), + video_id, 'Downloading JSON') + + if json_data['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) + + playlist = json_data['data']['playlist'] + medialist = playlist['medialist'] + media = medialist[0] + + if media['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) + + view_count = playlist.get('count_views') + priority_transport = playlist['priority_transport'] + + thumbnail = media['picture'] + width = int_or_none(media['width']) + height = int_or_none(media['height']) + description = media['anons'] + title = media['title'] + duration = int_or_none(media.get('duration')) + + formats = [] + + for transport, links in media['sources'].items(): + for quality, url in links.items(): + preference = -1 if priority_transport == transport else -2 + if transport == 'rtmp': + mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) + if not mobj: + continue + fmt = { + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': 'http://player.rutv.ru', + 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', + 'rtmp_live': True, + 'ext': 'flv', + 'vbr': int(quality), + 'quality': preference, + } + elif transport == 'm3u8': + formats.extend(self._extract_m3u8_formats( + url, video_id, 'mp4', quality=preference, m3u8_id='hls')) + continue + else: + fmt = { + 'url': url + } + fmt.update({ + 'width': width, + 'height': height, + 'format_id': '%s-%s' % (transport, quality), + }) + formats.append(fmt) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'duration': duration, + 'formats': formats, + 'is_live': is_live, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/ruutu.py b/plugins/youtube_download/yt_dlp/extractor/ruutu.py new file mode 100644 index 0000000..d9cf39d --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ruutu.py @@ -0,0 +1,227 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import ( + determine_ext, + ExtractorError, + find_xpath_attr, + int_or_none, + unified_strdate, + url_or_none, + xpath_attr, + xpath_text, +) + + +class RuutuIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| + static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= + ) + (?P\d+) + ''' + _TESTS = [ + { + 'url': 'http://www.ruutu.fi/video/2058907', + 'md5': 'ab2093f39be1ca8581963451b3c0234f', + 'info_dict': { + 'id': '2058907', + 'ext': 'mp4', + 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', + 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 114, + 'age_limit': 0, + }, + }, + { + 'url': 'http://www.ruutu.fi/video/2057306', + 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', + 'info_dict': { + 'id': '2057306', + 'ext': 'mp4', + 'title': 'Superpesis: katso koko kausi Ruudussa', + 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 40, + 'age_limit': 0, + }, + }, + { + 'url': 'http://www.supla.fi/supla/2231370', + 'md5': 'df14e782d49a2c0df03d3be2a54ef949', + 'info_dict': { + 'id': '2231370', + 'ext': 'mp4', + 'title': 'Osa 1: Mikael Jungner', + 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + }, + # Episode where is "NOT-USED", but has other + # downloadable sources available. + { + 'url': 'http://www.ruutu.fi/video/3193728', + 'only_matching': True, + }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': [ + 'HTTP Error 502: Bad Gateway', + 'Failed to download m3u8 information', + ], + }, + { + 'url': 'http://www.supla.fi/audio/2231370', + 'only_matching': True, + }, + { + 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', + 'only_matching': True, + }, + { + # episode + 'url': 'https://www.ruutu.fi/video/3401964', + 'info_dict': { + 'id': '3401964', + 'ext': 'mp4', + 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', + 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2582, + 'age_limit': 12, + 'upload_date': '20190508', + 'series': 'Temptation Island Suomi', + 'season_number': 5, + 'episode_number': 17, + 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], + }, + 'params': { + 'skip_download': True, + }, + }, + { + # premium + 'url': 'https://www.ruutu.fi/video/3618715', + 'only_matching': True, + }, + ] + _API_BASE = 'https://gatling.nelonenmedia.fi' + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_xml = self._download_xml( + '%s/media-xml-cache' % self._API_BASE, video_id, + query={'id': video_id}) + + formats = [] + processed_urls = [] + + def extract_formats(node): + for child in node: + if child.tag.endswith('Files'): + extract_formats(child) + elif child.tag.endswith('File'): + video_url = child.text + if (not video_url or video_url in processed_urls + or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): + continue + processed_urls.append(video_url) + ext = determine_ext(video_url) + auth_video_url = url_or_none(self._download_webpage( + '%s/auth/access/v2' % self._API_BASE, video_id, + note='Downloading authenticated %s stream URL' % ext, + fatal=False, query={'stream': video_url})) + if auth_video_url: + processed_urls.append(auth_video_url) + video_url = auth_video_url + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) + else: + proto = compat_urllib_parse_urlparse(video_url).scheme + if not child.tag.startswith('HTTP') and proto != 'rtmp': + continue + preference = -1 if proto == 'rtmp' else 1 + label = child.get('label') + tbr = int_or_none(child.get('bitrate')) + format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto + if not self._is_valid_url(video_url, video_id, format_id): + continue + width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'preference': preference, + }) + + extract_formats(video_xml.find('./Clip')) + + def pv(name): + node = find_xpath_attr( + video_xml, './Clip/PassthroughVariables/variable', 'name', name) + if node is not None: + return node.get('value') + + if not formats: + if (not self.get_param('allow_unplayable_formats') + and xpath_text(video_xml, './Clip/DRM', default=None)): + self.report_drm(video_id) + ns_st_cds = pv('ns_st_cds') + if ns_st_cds != 'free': + raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) + + self._sort_formats(formats) + + themes = pv('themes') + + return { + 'id': video_id, + 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), + 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), + 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), + 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), + 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), + 'upload_date': unified_strdate(pv('date_start')), + 'series': pv('series_name'), + 'season_number': int_or_none(pv('season_number')), + 'episode_number': int_or_none(pv('episode_number')), + 'categories': themes.split(',') if themes else [], + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/ruv.py b/plugins/youtube_download/yt_dlp/extractor/ruv.py new file mode 100644 index 0000000..8f3cc40 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/ruv.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + unified_timestamp, +) + + +class RuvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' + _TESTS = [{ + # m3u8 + 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', + 'md5': '66347652f4e13e71936817102acc1724', + 'info_dict': { + 'id': '1144499', + 'display_id': 'fh-valur/20170516', + 'ext': 'mp4', + 'title': 'FH - Valur', + 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', + 'timestamp': 1494963600, + 'upload_date': '20170516', + }, + }, { + # mp3 + 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', + 'md5': '395ea250c8a13e5fdb39d4670ef85378', + 'info_dict': { + 'id': '1153630', + 'display_id': 'morgunutvarpid/20170619', + 'ext': 'mp3', + 'title': 'Morgunútvarpið', + 'description': 'md5:a4cf1202c0a1645ca096b06525915418', + 'timestamp': 1497855000, + 'upload_date': '20170619', + }, + }, { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'only_matching': True, + }, { + 'url': 'http://www.ruv.is/node/1151854', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._og_search_title(webpage) + + FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' + + media_url = self._html_search_regex( + FIELD_RE % 'src', webpage, 'video URL', group='url') + + video_id = self._search_regex( + r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', + webpage, 'video id', default=display_id) + + ext = determine_ext(media_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mp3': + formats = [{ + 'format_id': 'mp3', + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{ + 'url': media_url, + }] + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/safari.py b/plugins/youtube_download/yt_dlp/extractor/safari.py new file mode 100644 index 0000000..cca4464 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/safari.py @@ -0,0 +1,269 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor + +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + update_url_query, +) + + +class SafariBaseIE(InfoExtractor): + _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' + _NETRC_MACHINE = 'safari' + + _API_BASE = 'https://learning.oreilly.com/api/v1' + _API_FORMAT = 'json' + + LOGGED_IN = False + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + _, urlh = self._download_webpage_handle( + 'https://learning.oreilly.com/accounts/login-check/', None, + 'Downloading login page') + + def is_logged(urlh): + return 'learning.oreilly.com/home/' in urlh.geturl() + + if is_logged(urlh): + self.LOGGED_IN = True + return + + redirect_url = urlh.geturl() + parsed_url = compat_urlparse.urlparse(redirect_url) + qs = compat_parse_qs(parsed_url.query) + next_uri = compat_urlparse.urljoin( + 'https://api.oreilly.com', qs['next'][0]) + + auth, urlh = self._download_json_handle( + 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', + data=json.dumps({ + 'email': username, + 'password': password, + 'redirect_uri': next_uri, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Referer': redirect_url, + }, expected_status=400) + + credentials = auth.get('credentials') + if (not auth.get('logged_in') and not auth.get('redirect_uri') + and credentials): + raise ExtractorError( + 'Unable to login: %s' % credentials, expected=True) + + # oreilly serves two same instances of the following cookies + # in Set-Cookie header and expects first one to be actually set + for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): + self._apply_first_set_cookie_header(urlh, cookie) + + _, urlh = self._download_webpage_handle( + auth.get('redirect_uri') or next_uri, None, 'Completing login',) + + if is_logged(urlh): + self.LOGGED_IN = True + return + + raise ExtractorError('Unable to log in') + + +class SafariIE(SafariBaseIE): + IE_NAME = 'safari' + IE_DESC = 'safaribooksonline.com online video' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+/(?P[^/]+)/(?P[^/?\#&]+)\.html| + videos/[^/]+/[^/]+/(?P[^-]+-[^/?\#&]+) + ) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', + 'md5': 'dcc5a425e79f2564148652616af1f2a3', + 'info_dict': { + 'id': '0_qbqx90ic', + 'ext': 'mp4', + 'title': 'Introduction to Hadoop Fundamentals LiveLessons', + 'timestamp': 1437758058, + 'upload_date': '20150724', + 'uploader_id': 'stork', + }, + }, { + # non-digits in course id + 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', + 'only_matching': True, + }] + + _PARTNER_ID = '1926081' + _UICONF_ID = '29375172' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + + reference_id = mobj.group('reference_id') + if reference_id: + video_id = reference_id + partner_id = self._PARTNER_ID + ui_id = self._UICONF_ID + else: + video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + mobj = re.match(self._VALID_URL, urlh.geturl()) + reference_id = mobj.group('reference_id') + if not reference_id: + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura widget id', default=self._PARTNER_ID, + group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura uiconf id', default=self._UICONF_ID, + group='id') + + query = { + 'wid': '_%s' % partner_id, + 'uiconf_id': ui_id, + 'flashvars[referenceId]': reference_id, + } + + if self.LOGGED_IN: + kaltura_session = self._download_json( + '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), + video_id, 'Downloading kaltura session JSON', + 'Unable to download kaltura session JSON', fatal=False, + headers={'Accept': 'application/json'}) + if kaltura_session: + session = kaltura_session.get('session') + if session: + query['flashvars[ks]'] = session + + return self.url_result(update_url_query( + 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), + 'Kaltura') + + +class SafariApiIE(SafariBaseIE): + IE_NAME = 'safari:api' + _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + part = self._download_json( + url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + 'Downloading part JSON') + web_url = part['web_url'] + if 'library/view' in web_url: + web_url = web_url.replace('library/view', 'videos') + natural_keys = part['natural_key'] + web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' + return self.url_result(web_url, SafariIE.ie_key()) + + +class SafariCourseIE(SafariBaseIE): + IE_NAME = 'safari:course' + IE_DESC = 'safaribooksonline.com online courses' + + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+| + api/v1/book| + videos/[^/]+ + )| + techbus\.safaribooksonline\.com + ) + /(?P[^/]+) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'info_dict': { + 'id': '9780133392838', + 'title': 'Hadoop Fundamentals LiveLessons', + }, + 'playlist_count': 22, + 'skip': 'Requires safaribooksonline account credentials', + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', + 'only_matching': True, + }, { + 'url': 'http://techbus.safaribooksonline.com/9780134426365', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) + else super(SafariCourseIE, cls).suitable(url)) + + def _real_extract(self, url): + course_id = self._match_id(url) + + course_json = self._download_json( + '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + course_id, 'Downloading course JSON') + + if 'chapters' not in course_json: + raise ExtractorError( + 'No chapters found for course %s' % course_id, expected=True) + + entries = [ + self.url_result(chapter, SafariApiIE.ie_key()) + for chapter in course_json['chapters']] + + course_title = course_json['title'] + + return self.playlist_result(entries, course_id, course_title) diff --git a/plugins/youtube_download/yt_dlp/extractor/saitosan.py b/plugins/youtube_download/yt_dlp/extractor/saitosan.py new file mode 100644 index 0000000..621335c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/saitosan.py @@ -0,0 +1,78 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError, try_get + + +class SaitosanIE(InfoExtractor): + IE_NAME = 'Saitosan' + _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.saitosan.net/bview.html?id=10031846', + 'info_dict': { + 'id': '10031846', + 'ext': 'mp4', + 'title': '井下原 和弥', + 'uploader': '井下原 和弥', + 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Broadcasts are ephemeral', + }, + { + 'url': 'http://www.saitosan.net/bview.html?id=10031795', + 'info_dict': { + 'id': '10031795', + 'ext': 'mp4', + 'title': '橋本', + 'uploader': '橋本', + 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Broadcasts are ephemeral', + }] + + def _real_extract(self, url): + b_id = self._match_id(url) + + base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3' + sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid') + base += '&sid=' + sid + + self._download_webpage(base, b_id, note='Polling socket') + payload = '420["room_start_join",{"room_id":"%s"}]' % b_id + payload = '%s:%s' % (len(payload), payload) + + self._download_webpage(base, b_id, data=payload, note='Polling socket with payload') + response = self._download_socket_json(base, b_id, note='Polling socket') + if not response.get('ok'): + err = response.get('error') or {} + raise ExtractorError( + '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err + else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect', + expected=True, video_id=b_id) + + self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket') + b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket') + m3u8_url = b_data.get('url') + + self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False) + + return { + 'id': b_id, + 'title': b_data.get('name'), + 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True), + 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'), + 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title + 'is_live': True + } diff --git a/plugins/youtube_download/yt_dlp/extractor/samplefocus.py b/plugins/youtube_download/yt_dlp/extractor/samplefocus.py new file mode 100644 index 0000000..806c3c3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/samplefocus.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + get_element_by_attribute, + int_or_none, +) + + +class SampleFocusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[^/?&#]+)' + _TESTS = [{ + 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', + 'md5': '48c8d62d60be467293912e0e619a5120', + 'info_dict': { + 'id': '40316', + 'display_id': 'lil-peep-sad-emo-guitar', + 'ext': 'mp3', + 'title': 'Lil Peep Sad Emo Guitar', + 'thumbnail': r're:^https?://.+\.png', + 'license': 'Standard License', + 'uploader': 'CapsCtrl', + 'uploader_id': 'capsctrl', + 'like_count': int, + 'comment_count': int, + 'categories': ['Samples', 'Guitar', 'Electric guitar'], + }, + }, { + 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', + 'only_matching': True + }, { + 'url': 'https://samplefocus.com/samples/young-chop-kick', + 'only_matching': True + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sample_id = self._search_regex( + r']+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P\d+)', + webpage, 'sample id', group='id') + + title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( + r'

(.+?)

', webpage, 'title') + + mp3_url = self._search_regex( + r']+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P(?:(?!\2).)+)', + webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex( + r']+itemprop=(["\'])contentUrl\1[^>]*>', + webpage, 'mp3 url', group=0))['content'] + + thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex( + r']+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P(?:(?!\1).)+)', + webpage, 'mp3', fatal=False, group='url') + + comments = [] + for author_id, author, body in re.findall(r'(?s)]+class="comment-author">]+href="/users/([^"]+)">([^"]+).+?]+class="comment-body">([^>]+)

', webpage): + comments.append({ + 'author': author, + 'author_id': author_id, + 'text': body, + }) + + uploader_id = uploader = None + mobj = re.search(r'>By ]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage) + if mobj: + uploader_id, uploader = mobj.groups() + + breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage) + categories = [] + if breadcrumb: + for _, name in re.findall(r']+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb): + categories.append(name) + + def extract_count(klass): + return int_or_none(self._html_search_regex( + r']+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass, + webpage, klass, fatal=False)) + + return { + 'id': sample_id, + 'title': title, + 'url': mp3_url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'license': self._html_search_regex( + r']+href=(["\'])/license\1[^>]*>(?P[^<]+)<', + webpage, 'license', fatal=False, group='license'), + 'uploader_id': uploader_id, + 'like_count': extract_count('sample-%s-favorites' % sample_id), + 'comment_count': extract_count('comments'), + 'comments': comments, + 'categories': categories, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/sapo.py b/plugins/youtube_download/yt_dlp/extractor/sapo.py new file mode 100644 index 0000000..df202a3 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/sapo.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class SapoIE(InfoExtractor): + IE_DESC = 'SAPO Vídeos' + _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P[\da-zA-Z]{20})' + + _TESTS = [ + { + 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', + 'md5': '79ee523f6ecb9233ac25075dee0eda83', + 'note': 'SD video', + 'info_dict': { + 'id': 'UBz95kOtiWYUMTA5Ghfi', + 'ext': 'mp4', + 'title': 'Benfica - Marcas na Hitória', + 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', + 'duration': 264, + 'uploader': 'tiago_1988', + 'upload_date': '20080229', + 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], + }, + }, + { + 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', + 'md5': '90a2f283cfb49193fe06e861613a72aa', + 'note': 'HD video', + 'info_dict': { + 'id': 'IyusNAZ791ZdoCY5H5IF', + 'ext': 'mp4', + 'title': 'Codebits VII - Report', + 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', + 'duration': 144, + 'uploader': 'codebits', + 'upload_date': '20140427', + 'categories': ['codebits', 'codebits2014'], + }, + }, + { + 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', + 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', + 'note': 'v2 video', + 'info_dict': { + 'id': 'yLqjzPtbTimsn2wWBKHz', + 'ext': 'mp4', + 'title': 'Hipnose Condicionativa 4', + 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', + 'duration': 692, + 'uploader': 'sapozen', + 'upload_date': '20090609', + 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], + }, + }, + ] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + item = self._download_xml( + 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') + + title = item.find('./title').text + description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text + thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') + duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) + uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text + upload_date = unified_strdate(item.find('./pubDate').text) + view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) + comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) + tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text + categories = tags.split() if tags else [] + age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 + + video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text + video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'sd', + 'width': int(video_size[0]), + 'height': int(video_size[1]), + }] + + if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': + formats.append({ + 'url': re.sub(r'/mov/1$', '/mov/39', video_url), + 'ext': 'mp4', + 'format_id': 'hd', + 'width': 1280, + 'height': 720, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/savefrom.py b/plugins/youtube_download/yt_dlp/extractor/savefrom.py new file mode 100644 index 0000000..98efdc2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/savefrom.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path + +from .common import InfoExtractor + + +class SaveFromIE(InfoExtractor): + IE_NAME = 'savefrom.net' + _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$' + + _TEST = { + 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', + 'info_dict': { + 'id': 'UlVRAPW2WJY', + 'ext': 'mp4', + 'title': 'About Team Radical MMA | MMA Fighting', + 'upload_date': '20120816', + 'uploader': 'Howcast', + 'uploader_id': 'Howcast', + 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', + }, + 'params': { + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = os.path.splitext(url.split('/')[-1])[0] + + return self.url_result(mobj.group('url'), video_id=video_id) diff --git a/plugins/youtube_download/yt_dlp/extractor/sbs.py b/plugins/youtube_download/yt_dlp/extractor/sbs.py new file mode 100644 index 0000000..4090f63 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/sbs.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + ExtractorError, +) + + +class SBSIE(InfoExtractor): + IE_DESC = 'sbs.com.au' + _VALID_URL = r'''(?x) + https?://(?:www\.)?sbs\.com\.au/(?: + ondemand(?: + /video/(?:single/)?| + /movie/[^/]+/| + .*?\bplay=|/watch/ + )|news/(?:embeds/)?video/ + )(?P[0-9]+)''' + + _TESTS = [{ + # Original URL is handled by the generic IE which finds the iframe: + # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation + 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', + 'md5': '3150cf278965eeabb5b4cea1c963fe0a', + 'info_dict': { + 'id': '_rFBPRPO4pMR', + 'ext': 'mp4', + 'title': 'Dingo Conservation (The Feed)', + 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', + 'thumbnail': r're:http://.*\.jpg', + 'duration': 308, + 'timestamp': 1408613220, + 'upload_date': '20140821', + 'uploader': 'SBSC', + }, + }, { + 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', + 'only_matching': True, + }, { + 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', + 'only_matching': True, + }, { + 'note': 'Live stream', + 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + player_params = self._download_json( + 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id) + + error = player_params.get('error') + if error: + error_message = 'Sorry, The video you are looking for does not exist.' + video_data = error.get('results') or {} + error_code = error.get('errorCode') + if error_code == 'ComingSoon': + error_message = '%s is not yet available.' % video_data.get('title', '') + elif error_code in ('Forbidden', 'intranetAccessOnly'): + error_message = 'Sorry, This video cannot be accessed via this website' + elif error_code == 'Expired': + error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '') + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + + urls = player_params['releaseUrls'] + theplatform_url = (urls.get('progressive') or urls.get('html') + or urls.get('standard') or player_params['relatedItemsURL']) + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'id': video_id, + 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), + 'is_live': player_params.get('streamType') == 'live', + } diff --git a/plugins/youtube_download/yt_dlp/extractor/screencast.py b/plugins/youtube_download/yt_dlp/extractor/screencast.py new file mode 100644 index 0000000..69a0d01 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/screencast.py @@ -0,0 +1,123 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, +) + + +class ScreencastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://screencast.com/t/aAB3iowa', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + + if video_url is None: + raise ExtractorError('Cannot find video') + + title = self._og_search_title(webpage, default=None) + if title is None: + title = self._html_search_regex( + [r'Title: ([^<]+)
', + r'class="tabSeperator">>(.+?)<', + r'([^<]+)'], + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/plugins/youtube_download/yt_dlp/extractor/screencastomatic.py b/plugins/youtube_download/yt_dlp/extractor/screencastomatic.py new file mode 100644 index 0000000..0afdc17 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/screencastomatic.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + get_element_by_class, + int_or_none, + remove_start, + strip_or_none, + unified_strdate, +) + + +class ScreencastOMaticIE(InfoExtractor): + _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P[0-9a-zA-Z]+)' + _TESTS = [{ + 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', + 'md5': '483583cb80d92588f15ccbedd90f0c18', + 'info_dict': { + 'id': 'c2lD3BeOPl', + 'ext': 'mp4', + 'title': 'Welcome to 3-4 Philosophy @ DECV!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', + 'duration': 369, + 'upload_date': '20141216', + } + }, { + 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', + 'only_matching': True, + }, { + 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://screencast-o-matic.com/player/' + video_id, video_id) + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + info.update({ + 'id': video_id, + 'title': get_element_by_class('overlayTitle', webpage), + 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None, + 'duration': int_or_none(self._search_regex( + r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};', + webpage, 'duration', default=None)), + 'upload_date': unified_strdate(remove_start( + get_element_by_class('overlayPublished', webpage), 'Published: ')), + }) + return info diff --git a/plugins/youtube_download/yt_dlp/extractor/scrippsnetworks.py b/plugins/youtube_download/yt_dlp/extractor/scrippsnetworks.py new file mode 100644 index 0000000..84918b6 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/scrippsnetworks.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import hashlib + +from .aws import AWSIE +from .anvato import AnvatoIE +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + urlencode_postdata, + xpath_text, +) + + +class ScrippsNetworksWatchIE(AWSIE): + IE_NAME = 'scrippsnetworks:watch' + _VALID_URL = r'''(?x) + https?:// + watch\. + (?Pgeniuskitchen)\.com/ + (?: + player\.[A-Z0-9]+\.html\#| + show/(?:[^/]+/){2}| + player/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', + 'info_dict': { + 'id': '4194875', + 'ext': 'mp4', + 'title': 'Ample Hills Ice Cream Bike', + 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', + 'uploader': 'ANV', + 'upload_date': '20171011', + 'timestamp': 1507698000, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [AnvatoIE.ie_key()], + }] + + _SNI_TABLE = { + 'geniuskitchen': 'genius', + } + + _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' + + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + site_id, video_id = mobj.group('site', 'id') + + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') + token = self._download_json( + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, + headers={ + 'Accept': '*/*', + 'Content-Type': 'application/x-amz-json-1.1', + 'Referer': url, + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), + 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + })['Token'] + + sts = self._download_xml( + 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ + 'Action': 'AssumeRoleWithWebIdentity', + 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', + 'RoleSessionName': 'web-identity', + 'Version': '2011-06-15', + 'WebIdentityToken': token, + }), headers={ + 'Referer': url, + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', + }) + + def get(key): + return xpath_text( + sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, + fatal=True) + + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] + + return self.url_result( + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), + AnvatoIE.ie_key(), video_id=mcp_id) + + +class ScrippsNetworksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', + 'info_dict': { + 'id': '0260338', + 'ext': 'mp4', + 'title': 'The Best of the Best', + 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', + 'timestamp': 1475678834, + 'upload_date': '20161005', + 'uploader': 'SCNI-SCND', + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', + 'only_matching': True, + }, { + 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', + 'only_matching': True, + }, { + 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', + 'only_matching': True, + }, { + 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'only_matching': True, + }, { + 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', + 'only_matching': True, + }] + _ACCOUNT_MAP = { + 'cookingchanneltv': 2433005105, + 'discovery': 2706091867, + 'diynetwork': 2433004575, + 'foodnetwork': 2433005105, + 'hgtv': 2433004575, + 'travelchannel': 2433005739, + } + _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' + + def _real_extract(self, url): + site, guid = self._match_valid_url(url).groups() + return self.url_result(smuggle_url( + self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), + {'force_smil_url': True}), 'ThePlatform', guid) diff --git a/plugins/youtube_download/yt_dlp/extractor/scte.py b/plugins/youtube_download/yt_dlp/extractor/scte.py new file mode 100644 index 0000000..ca1de63 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/scte.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + decode_packed_codes, + ExtractorError, + urlencode_postdata, +) + + +class SCTEBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' + _NETRC_MACHINE = 'scte' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']welcome\b', r'>Sign Out<')) + + # already logged in + if is_logged(login_popup): + return + + login_form = self._hidden_inputs(login_popup) + + login_form.update({ + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', + }) + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + if '|pageRedirect|' not in response and not is_logged(response): + error = self._html_search_regex( + r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', + 'info_dict': { + 'title': 'Introduction to DOCSIS Engineering Professional', + 'id': '31484', + }, + 'playlist_count': 5, + 'skip': 'Requires account credentials', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'

(.+?)

', webpage, 'title') + + context_id = self._search_regex(r'context-(\d+)', webpage, video_id) + content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id + context = decode_packed_codes(self._download_webpage( + '%smobile/data.js' % content_base, video_id)) + + data = self._parse_xml( + self._search_regex( + r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), + video_id) + + entries = [] + for asset in data.findall('.//asset'): + asset_url = asset.get('url') + if not asset_url or not asset_url.endswith('.mp4'): + continue + asset_id = self._search_regex( + r'video_([^_]+)_', asset_url, 'asset id', default=None) + if not asset_id: + continue + entries.append({ + 'id': asset_id, + 'title': title, + 'url': content_base + asset_url, + }) + + return self.playlist_result(entries, video_id, title) + + +class SCTECourseIE(SCTEBaseIE): + _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3639', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3073', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._search_regex( + r'

(.+?)

', webpage, 'title', default=None) + + entries = [] + for mobj in re.finditer( + r'''(?x) + ]+ + href=(["\']) + (?P + https?://learning\.scte\.org/mod/ + (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? + \bid=\d+ + ) + ''', + webpage): + item_url = mobj.group('url') + if item_url == url: + continue + ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' + else SCTECourseIE.ie_key()) + entries.append(self.url_result(item_url, ie=ie)) + + return self.playlist_result(entries, course_id, title) diff --git a/plugins/youtube_download/yt_dlp/extractor/seeker.py b/plugins/youtube_download/yt_dlp/extractor/seeker.py new file mode 100644 index 0000000..e5c18c7 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/seeker.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + get_element_by_class, + strip_or_none, +) + + +class SeekerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' + _TESTS = [{ + 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', + 'md5': '897d44bbe0d8986a2ead96de565a92db', + 'info_dict': { + 'id': 'Elrn3gnY', + 'ext': 'mp4', + 'title': 'Should Trump Be Required To Release His Tax Returns?', + 'description': 'md5:41efa8cfa8d627841045eec7b018eb45', + 'timestamp': 1490090165, + 'upload_date': '20170321', + } + }, { + 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', + 'playlist': [ + { + 'md5': '0497b9f20495174be73ae136949707d2', + 'info_dict': { + 'id': 'FihYQ8AE', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', + 'timestamp': 1490039133, + 'upload_date': '20170320', + }, + } + ], + 'info_dict': { + 'id': '1834116536', + 'title': 'After Gorilla Killing, Changes Ahead for Zoos', + 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', + }, + }] + + def _real_extract(self, url): + display_id, article_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, display_id) + entries = [] + for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): + entries.append(self.url_result( + 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) + return self.playlist_result( + entries, article_id, + self._og_search_title(webpage), + strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) diff --git a/plugins/youtube_download/yt_dlp/extractor/senategov.py b/plugins/youtube_download/yt_dlp/extractor/senategov.py new file mode 100644 index 0000000..6f42404 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/senategov.py @@ -0,0 +1,213 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + parse_qs, + unsmuggle_url, +) + +_COMMITTEES = { + 'ag': ('76440', 'http://ag-f.akamaihd.net'), + 'aging': ('76442', 'http://aging-f.akamaihd.net'), + 'approps': ('76441', 'http://approps-f.akamaihd.net'), + 'arch': ('', 'http://ussenate-f.akamaihd.net'), + 'armed': ('76445', 'http://armed-f.akamaihd.net'), + 'banking': ('76446', 'http://banking-f.akamaihd.net'), + 'budget': ('76447', 'http://budget-f.akamaihd.net'), + 'cecc': ('76486', 'http://srs-f.akamaihd.net'), + 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), + 'csce': ('75229', 'http://srs-f.akamaihd.net'), + 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), + 'energy': ('76448', 'http://energy-f.akamaihd.net'), + 'epw': ('76478', 'http://epw-f.akamaihd.net'), + 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), + 'finance': ('76450', 'http://finance-f.akamaihd.net'), + 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), + 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), + 'help': ('76452', 'http://help-f.akamaihd.net'), + 'indian': ('76455', 'http://indian-f.akamaihd.net'), + 'intel': ('76456', 'http://intel-f.akamaihd.net'), + 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), + 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), + 'jec': ('76458', 'http://jec-f.akamaihd.net'), + 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), + 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), + 'rules': ('76460', 'http://rules-f.akamaihd.net'), + 'saa': ('76489', 'http://srs-f.akamaihd.net'), + 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), + 'srs': ('75229', 'http://srs-f.akamaihd.net'), + 'uscc': ('76487', 'http://srs-f.akamaihd.net'), + 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), +} + + +class SenateISVPIE(InfoExtractor): + _IE_NAME = 'senate.gov:isvp' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + + stream_num, domain = _COMMITTEES[committee] + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } + + +class SenateGovIE(InfoExtractor): + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' + _TESTS = [{ + 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'info_dict': { + 'id': 'help090920', + 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', + 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'info_dict': { + 'id': 'appropsA051518', + 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'title': 'Review of the FY2019 Budget Request for the U.S. Army', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'info_dict': { + 'id': 'banking041521', + 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', + 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + display_id = self._generic_id(url) + webpage = self._download_webpage(url, display_id) + parse_info = parse_qs(self._search_regex( + r'