Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
2f47614eaf | |||
a5f864b802 | |||
5e9fe86cd6 | |||
da70244a54 | |||
c01e81af27 | |||
41f39ba8cc | |||
bebe0c7cba | |||
74d53690e2 | |||
061dbf19ad | |||
1798213bfc | |||
6bd4d97db2 | |||
7737e3ad6d | |||
3c914e64dd |
@ -8,7 +8,7 @@ Additionally, if not building a .deb then just move the contents of user_config
|
||||
Copy the share/solarfm folder to your user .config/ directory too.
|
||||
|
||||
`pyrightconfig.json`
|
||||
<p>The pyrightconfig file needs to stay on same level as the .git folders in order to have settings detected when using pyright with lsp functionality. "pyrightconfig.json" can prompt IDEs such as Zed on settings to use and where imports are located- look at venvPath and venv. "venvPath" is parent path of "venv" where "venv" is just the name of the folder under the parent path that is the python created venv.
|
||||
<p>The pyrightconfig file needs to stay on same level as the .git folders in order to have settings detected when using pyright with lsp functionality.</p>
|
||||
|
||||
<h6>Install Setup</h6>
|
||||
```
|
||||
|
13
cookies.txt
13
cookies.txt
@ -1,13 +0,0 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# This file is generated by yt-dlp. Do not edit.
|
||||
|
||||
.youtube.com TRUE / FALSE 0 PREF hl=en&tz=UTC
|
||||
.youtube.com TRUE / TRUE 0 SOCS CAI
|
||||
.youtube.com TRUE / TRUE 1746228332 GPS 1
|
||||
.youtube.com TRUE / TRUE 0 YSC WmwjDInItf4
|
||||
.youtube.com TRUE / TRUE 1761778533 __Secure-ROLLOUT_TOKEN CO_R6PStoK3FQRCTgL7h8IWNAxi2uuTh8IWNAw%3D%3D
|
||||
.youtube.com TRUE / TRUE 1761778533 VISITOR_INFO1_LIVE 9VALBXb6AdM
|
||||
.youtube.com TRUE / TRUE 1761778533 VISITOR_PRIVACY_METADATA CgJVUxIEGgAgOQ%3D%3D
|
||||
.youtube.com TRUE / TRUE 1761778533 YT_DEVICE_MEASUREMENT_ID TWV9k30=
|
||||
.youtube.com TRUE / TRUE 1809298533 __Secure-YT_TVFAS t=485062&s=2
|
||||
.youtube.com TRUE / TRUE 1761778533 DEVICE_INFO ChxOelE1T1RrNE5UZzFNRGd5TURJM09UVTROdz09EOWa1cAGGOWa1cAG
|
@ -14,7 +14,6 @@ class Manifest:
|
||||
'ui_target': "plugin_control_list",
|
||||
'pass_fm_events': "true"
|
||||
}
|
||||
pre_launch: bool = False
|
||||
```
|
||||
|
||||
|
||||
|
@ -122,6 +122,7 @@ class Plugin(PluginBase):
|
||||
uri = state.uris[0]
|
||||
path = state.tab.get_current_directory()
|
||||
|
||||
|
||||
properties = self._set_ui_data(uri, path)
|
||||
response = self._properties_dialog.run()
|
||||
if response in [Gtk.ResponseType.CANCEL, Gtk.ResponseType.DELETE_EVENT]:
|
||||
@ -173,7 +174,7 @@ class Plugin(PluginBase):
|
||||
|
||||
is_symlink = file_info.get_attribute_as_string("standard::is-symlink")
|
||||
properties.file_uri = uri
|
||||
properties.file_target = file_info.get_attribute_as_string("standard::symlink-target") if is_symlink in [True, "TRUE"] else ""
|
||||
properties.file_target = file_info.get_attribute_as_string("standard::symlink-target") if is_symlink else ""
|
||||
properties.file_name = file_info.get_display_name()
|
||||
properties.file_location = path
|
||||
properties.mime_type = file_info.get_content_type()
|
||||
|
@ -48,7 +48,7 @@ class GrepPreviewWidget(Gtk.Box):
|
||||
return bytes(f"\n<span foreground='{color}'>{target}</span>", "utf-8").decode("utf-8")
|
||||
|
||||
def make_utf8_line_highlight(self, buffer, itr, i, color, target, query):
|
||||
parts = re.split(r"(?i)(" + query + ")", target.replace("\n", ""))
|
||||
parts = re.split(r"(" + query + ")(?i)", target.replace("\n", ""))
|
||||
for part in parts:
|
||||
itr = buffer.get_end_iter()
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
"ui_target": "plugin_control_list",
|
||||
"pass_fm_events": "true",
|
||||
"bind_keys": ["Example Plugin||send_message:<Control>f"]
|
||||
},
|
||||
"pre_launch": "false"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +0,0 @@
|
||||
"""
|
||||
Pligin Module
|
||||
"""
|
@ -1,3 +0,0 @@
|
||||
"""
|
||||
Pligin Package
|
||||
"""
|
@ -1,73 +0,0 @@
|
||||
# Python imports
|
||||
import json
|
||||
import os
|
||||
from os import path
|
||||
|
||||
# Lib imports
|
||||
import gi
|
||||
gi.require_version('Gtk', '3.0')
|
||||
from gi.repository import Gtk
|
||||
|
||||
# Application imports
|
||||
from .icon import Icon
|
||||
|
||||
|
||||
|
||||
class IconController(Icon):
|
||||
def __init__(self):
|
||||
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# NOTE: app_name should be defined using python 'builtins' and so too must be logger used in the various classes
|
||||
app_name_exists = False
|
||||
try:
|
||||
app_name
|
||||
app_name_exists = True
|
||||
except Exception as e:
|
||||
...
|
||||
|
||||
APP_CONTEXT = f"{app_name.lower()}" if app_name_exists else "shellfm"
|
||||
USR_APP_CONTEXT = f"/usr/share/{APP_CONTEXT}"
|
||||
USER_HOME = path.expanduser('~')
|
||||
CONFIG_PATH = f"{USER_HOME}/.config/{APP_CONTEXT}"
|
||||
self.DEFAULT_ICONS = f"{CONFIG_PATH}/icons"
|
||||
self.DEFAULT_ICON = f"{self.DEFAULT_ICONS}/text.png"
|
||||
self.FFMPG_THUMBNLR = f"{CONFIG_PATH}/ffmpegthumbnailer" # Thumbnail generator binary
|
||||
self.BLENDER_THUMBNLR = f"{CONFIG_PATH}/blender-thumbnailer" # Blender thumbnail generator binary
|
||||
|
||||
self.ICON_DIRS = ["/usr/share/icons", f"{USER_HOME}/.icons" "/usr/share/pixmaps"]
|
||||
self.BASE_THUMBS_PTH = f"{USER_HOME}/.thumbnails"
|
||||
self.ABS_THUMBS_PTH = f"{self.BASE_THUMBS_PTH}/normal"
|
||||
self.STEAM_ICONS_PTH = f"{self.BASE_THUMBS_PTH}/steam_icons"
|
||||
|
||||
if not path.isdir(self.BASE_THUMBS_PTH):
|
||||
os.mkdir(self.BASE_THUMBS_PTH)
|
||||
|
||||
if not path.isdir(self.ABS_THUMBS_PTH):
|
||||
os.mkdir(self.ABS_THUMBS_PTH)
|
||||
|
||||
if not path.isdir(self.STEAM_ICONS_PTH):
|
||||
os.mkdir(self.STEAM_ICONS_PTH)
|
||||
|
||||
if not os.path.exists(self.DEFAULT_ICONS):
|
||||
self.DEFAULT_ICONS = f"{USR_APP_CONTEXT}/icons"
|
||||
self.DEFAULT_ICON = f"{self.DEFAULT_ICONS}/text.png"
|
||||
|
||||
CONFIG_FILE = f"{CURRENT_PATH}/../settings.json"
|
||||
with open(CONFIG_FILE) as f:
|
||||
settings = json.load(f)
|
||||
config = settings["config"]
|
||||
|
||||
self.container_icon_wh = config["container_icon_wh"]
|
||||
self.video_icon_wh = config["video_icon_wh"]
|
||||
self.sys_icon_wh = config["sys_icon_wh"]
|
||||
|
||||
# Filters
|
||||
filters = settings["filters"]
|
||||
self.fmeshs = tuple(filters["meshs"])
|
||||
self.fcode = tuple(filters["code"])
|
||||
self.fvideos = tuple(filters["videos"])
|
||||
self.foffice = tuple(filters["office"])
|
||||
self.fimages = tuple(filters["images"])
|
||||
self.ftext = tuple(filters["text"])
|
||||
self.fmusic = tuple(filters["music"])
|
||||
self.fpdf = tuple(filters["pdf"])
|
@ -1,12 +0,0 @@
|
||||
{
|
||||
"manifest": {
|
||||
"name": "Thumbnailer",
|
||||
"author": "ITDominator",
|
||||
"version": "0.0.1",
|
||||
"support": "",
|
||||
"requests": {
|
||||
"pass_fm_events": "true"
|
||||
},
|
||||
"pre_launch": "true"
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
# Python imports
|
||||
import os
|
||||
|
||||
# Lib imports
|
||||
|
||||
# Application imports
|
||||
from plugins.plugin_base import PluginBase
|
||||
from .icons.controller import IconController
|
||||
|
||||
|
||||
|
||||
class Plugin(PluginBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.name = "Thumbnailer" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
|
||||
# where self.name should not be needed for message comms
|
||||
# self.path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
def run(self):
|
||||
self.icon_controller = IconController()
|
||||
self._event_system.subscribe("create-thumbnail", self.create_thumbnail)
|
||||
|
||||
def generate_reference_ui_element(self):
|
||||
...
|
||||
|
||||
def create_thumbnail(self, dir, file) -> str:
|
||||
return self.icon_controller.create_icon(dir, file)
|
||||
|
||||
def get_video_icons(self, dir) -> list:
|
||||
data = []
|
||||
|
||||
def get_video_icons(self) -> list:
|
||||
data = []
|
||||
fvideos = self.icon_controller.fvideos
|
||||
vids = [ file for file in os.path.list_dir(dir) if file.lower().endswith(fvideos) ]
|
||||
|
||||
for file in vids:
|
||||
img_hash, hash_img_path = self.create_video_thumbnail(full_path = f"{dir}/{file}", returnHashInstead = True)
|
||||
data.append([img_hash, hash_img_path])
|
||||
|
||||
return data
|
||||
|
||||
def get_pixbuf_icon_str_combo(self, dir) -> list:
|
||||
data = []
|
||||
for file in os.path.list_dir(dir):
|
||||
icon = self.icon_controller.create_icon(dir, file).get_pixbuf()
|
||||
data.append([icon, file])
|
||||
|
||||
return data
|
||||
|
||||
def get_gtk_icon_str_combo(self, dir) -> list:
|
||||
data = []
|
||||
for file in os.path.list_dir(dir):
|
||||
icon = self.icon_controller.create_icon(dir, file)
|
||||
data.append([icon, file[0]])
|
||||
|
||||
return data
|
@ -1,101 +0,0 @@
|
||||
{
|
||||
"config":{
|
||||
"thumbnailer_path":"ffmpegthumbnailer",
|
||||
"blender_thumbnailer_path":"",
|
||||
"container_icon_wh":[
|
||||
128,
|
||||
128
|
||||
],
|
||||
"video_icon_wh":[
|
||||
128,
|
||||
64
|
||||
],
|
||||
"sys_icon_wh":[
|
||||
56,
|
||||
56
|
||||
],
|
||||
"steam_cdn_url":"https://steamcdn-a.akamaihd.net/steam/apps/",
|
||||
"remux_folder_max_disk_usage":"8589934592"
|
||||
},
|
||||
"filters":{
|
||||
"meshs":[
|
||||
".dae",
|
||||
".fbx",
|
||||
".gltf",
|
||||
".obj",
|
||||
".stl"
|
||||
],
|
||||
"code":[
|
||||
".cpp",
|
||||
".css",
|
||||
".c",
|
||||
".go",
|
||||
".html",
|
||||
".htm",
|
||||
".java",
|
||||
".js",
|
||||
".json",
|
||||
".lua",
|
||||
".md",
|
||||
".py",
|
||||
".rs",
|
||||
".toml",
|
||||
".xml",
|
||||
".pom"
|
||||
],
|
||||
"videos":[
|
||||
".mkv",
|
||||
".mp4",
|
||||
".webm",
|
||||
".avi",
|
||||
".mov",
|
||||
".m4v",
|
||||
".mpg",
|
||||
".mpeg",
|
||||
".wmv",
|
||||
".flv"
|
||||
],
|
||||
"office":[
|
||||
".doc",
|
||||
".docx",
|
||||
".xls",
|
||||
".xlsx",
|
||||
".xlt",
|
||||
".xltx",
|
||||
".xlm",
|
||||
".ppt",
|
||||
".pptx",
|
||||
".pps",
|
||||
".ppsx",
|
||||
".odt",
|
||||
".rtf"
|
||||
],
|
||||
"images":[
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".ico",
|
||||
".tga",
|
||||
".webp"
|
||||
],
|
||||
"text":[
|
||||
".txt",
|
||||
".text",
|
||||
".sh",
|
||||
".cfg",
|
||||
".conf",
|
||||
".log"
|
||||
],
|
||||
"music":[
|
||||
".psf",
|
||||
".mp3",
|
||||
".ogg",
|
||||
".flac",
|
||||
".m4a"
|
||||
],
|
||||
"pdf":[
|
||||
".pdf"
|
||||
]
|
||||
}
|
||||
}
|
@ -184,8 +184,8 @@ class Plugin(PluginBase):
|
||||
response = requests.post(self.vqd_link, headers=self.vqd_headers, data=self.vqd_data, timeout=2)
|
||||
if response.status_code == 200:
|
||||
data = response.content
|
||||
vqd_start_index = data.index(b"vqd=\"") + 5
|
||||
vqd_end_index = data.index(b"\"", vqd_start_index)
|
||||
vqd_start_index = data.index(b"vqd='") + 5
|
||||
vqd_end_index = data.index(b"'", vqd_start_index)
|
||||
self._vqd_attrib = data[vqd_start_index:vqd_end_index].decode("utf-8")
|
||||
|
||||
print(f"Translation VQD: {self._vqd_attrib}")
|
||||
|
@ -111,8 +111,6 @@ class Plugin(PluginBase):
|
||||
for uri in state.uris:
|
||||
self.trashman.trash(uri, verbocity)
|
||||
|
||||
self.trashman.regenerate()
|
||||
|
||||
def restore_trash_files(self, widget = None, eve = None, verbocity = False):
|
||||
self._event_system.emit("get_current_state")
|
||||
state = self._fm_state
|
||||
|
@ -43,4 +43,4 @@ class Trash(object):
|
||||
|
||||
def restore(self, filename, verbose):
|
||||
"""Restore a file from trash."""
|
||||
raise NotImplementedError(_('Backend didn’t implement this functionality'))
|
||||
raise NotImplementedError(_('Backend didn’t \ implement this functionality'))
|
||||
|
@ -127,7 +127,7 @@ DeletionDate={}
|
||||
f.write(infofile)
|
||||
f.close()
|
||||
|
||||
# self.regenerate()
|
||||
self.regenerate()
|
||||
|
||||
if verbose:
|
||||
sys.stderr.write(_('trashed \'{}\'\n').format(filename))
|
||||
|
@ -8,29 +8,12 @@
|
||||
|
||||
|
||||
function main() {
|
||||
_STARGET="${1}"
|
||||
_SPATH="${HOME}/.config/solarfm/plugins/youtube_download"
|
||||
cd "$(dirname "")"
|
||||
echo "Working Dir: " $(pwd)
|
||||
LINK=`xclip -selection clipboard -o`
|
||||
|
||||
cd "${_SPATH}"
|
||||
echo "Working Dir: " $(pwd)
|
||||
|
||||
rm "${_SPATH}/../../cookies.txt"
|
||||
|
||||
# Note: Export cookies to file
|
||||
python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
--cookies-from-browser firefox --cookies "${_SPATH}/../../cookies.txt"
|
||||
|
||||
# Note: Use cookies from browser directly
|
||||
# python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
# --cookies-from-browser firefox --write-sub --embed-sub --sub-langs en \
|
||||
# -o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
|
||||
|
||||
# Note: Download video
|
||||
python "${_SPATH}/yt_dlp/__main__.py" \
|
||||
-f "bestvideo[height<=1080][ext=mp4][vcodec^=avc]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
|
||||
--cookies "${_SPATH}/../../cookies.txt" --write-sub --embed-sub --sub-langs en \
|
||||
-o "${_STARGET}/%(title)s.%(ext)s" "${LINK}"
|
||||
|
||||
python "${HOME}/.config/solarfm/plugins/youtube_download/yt_dlp/__main__.py" \
|
||||
--write-sub --embed-sub --sub-langs en \
|
||||
-o "${1}/%(title)s.%(ext)s" "${LINK}"
|
||||
}
|
||||
main "$@";
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
import sys
|
||||
try:
|
||||
import contextvars # noqa: F401
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
raise ImportError(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541
|
||||
|
||||
__license__ = 'The Unlicense'
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import collections
|
||||
import getpass
|
||||
@ -12,16 +12,15 @@ import itertools
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
|
||||
from .compat import compat_shlex_quote
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .networking.impersonate import ImpersonateTarget
|
||||
from .globals import IN_CLI, plugin_dirs
|
||||
from .options import parseOpts
|
||||
from .plugins import load_all_plugins as _load_all_plugins
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegMergerPP,
|
||||
@ -44,12 +43,12 @@ from .utils import (
|
||||
GeoUtils,
|
||||
PlaylistEntries,
|
||||
SameFileError,
|
||||
decodeOption,
|
||||
download_range_func,
|
||||
expand_path,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
match_filter_func,
|
||||
parse_bytes,
|
||||
parse_duration,
|
||||
@ -58,15 +57,15 @@ from .utils import (
|
||||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
shell_quote,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .utils._utils import _UnsafeExtensionError
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
||||
|
||||
def _exit(status=0, *args):
|
||||
for msg in args:
|
||||
@ -75,16 +74,14 @@ def _exit(status=0, *args):
|
||||
|
||||
|
||||
def get_urls(urls, batchfile, verbose):
|
||||
"""
|
||||
@param verbose -1: quiet, 0: normal, 1: verbose
|
||||
"""
|
||||
# Batch file verification
|
||||
batch_urls = []
|
||||
if batchfile is not None:
|
||||
try:
|
||||
batch_urls = read_batch_urls(
|
||||
read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
|
||||
read_stdin('URLs') if batchfile == '-'
|
||||
else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
|
||||
if verbose == 1:
|
||||
if verbose:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except OSError:
|
||||
_exit(f'ERROR: batch file {batchfile} could not be read')
|
||||
@ -115,9 +112,9 @@ def print_extractor_information(opts, urls):
|
||||
ie.description(markdown=False, search_examples=_SEARCHES)
|
||||
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
|
||||
elif opts.ap_list_mso:
|
||||
out = 'Supported TV Providers:\n{}\n'.format(render_table(
|
||||
out = 'Supported TV Providers:\n%s\n' % render_table(
|
||||
['mso', 'mso name'],
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
|
||||
else:
|
||||
return False
|
||||
write_string(out, out=sys.stdout)
|
||||
@ -129,7 +126,7 @@ def set_compat_opts(opts):
|
||||
if name not in opts.compat_opts:
|
||||
return False
|
||||
opts.compat_opts.discard(name)
|
||||
opts.compat_opts.update([f'*{name}'])
|
||||
opts.compat_opts.update(['*%s' % name])
|
||||
return True
|
||||
|
||||
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
@ -156,9 +153,6 @@ def set_compat_opts(opts):
|
||||
opts.embed_infojson = False
|
||||
if 'format-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter.ytdl_default)
|
||||
elif 'prefer-vp9-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
|
||||
|
||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
@ -225,7 +219,7 @@ def validate_options(opts):
|
||||
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
|
||||
|
||||
if opts.wait_for_video is not None:
|
||||
min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
|
||||
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
|
||||
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
|
||||
'time range to wait for video', opts.wait_for_video)
|
||||
validate_minmax(min_wait, max_wait, 'time range to wait for video')
|
||||
@ -236,11 +230,6 @@ def validate_options(opts):
|
||||
validate_regex('format sorting', f, FormatSorter.regex)
|
||||
|
||||
# Postprocessor formats
|
||||
if opts.convertsubtitles == 'none':
|
||||
opts.convertsubtitles = None
|
||||
if opts.convertthumbnails == 'none':
|
||||
opts.convertthumbnails = None
|
||||
|
||||
validate_regex('merge output format', opts.merge_output_format,
|
||||
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
|
||||
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
||||
@ -260,11 +249,9 @@ def validate_options(opts):
|
||||
elif value in ('inf', 'infinite'):
|
||||
return float('inf')
|
||||
try:
|
||||
int_value = int(value)
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
validate(False, f'{name} retry count', value)
|
||||
validate_positive(f'{name} retry count', int_value)
|
||||
return int_value
|
||||
|
||||
opts.retries = parse_retries('download', opts.retries)
|
||||
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
|
||||
@ -274,9 +261,9 @@ def validate_options(opts):
|
||||
# Retry sleep function
|
||||
def parse_sleep_func(expr):
|
||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||
op, start, limit, step, *_ = (*tuple(re.fullmatch(
|
||||
op, start, limit, step, *_ = tuple(re.fullmatch(
|
||||
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
|
||||
expr.strip()).groups()), None, None)
|
||||
expr.strip()).groups()) + (None, None)
|
||||
|
||||
if op == 'exp':
|
||||
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
|
||||
@ -294,20 +281,18 @@ def validate_options(opts):
|
||||
raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
|
||||
|
||||
# Bytes
|
||||
def validate_bytes(name, value, strict_positive=False):
|
||||
def validate_bytes(name, value):
|
||||
if value is None:
|
||||
return None
|
||||
numeric_limit = parse_bytes(value)
|
||||
validate(numeric_limit is not None, name, value)
|
||||
if strict_positive:
|
||||
validate_positive(name, numeric_limit, True)
|
||||
validate(numeric_limit is not None, 'rate limit', value)
|
||||
return numeric_limit
|
||||
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True)
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit)
|
||||
opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit)
|
||||
opts.min_filesize = validate_bytes('min filesize', opts.min_filesize)
|
||||
opts.max_filesize = validate_bytes('max filesize', opts.max_filesize)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize, True)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize)
|
||||
opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size)
|
||||
|
||||
# Output templates
|
||||
@ -402,19 +387,16 @@ def validate_options(opts):
|
||||
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
|
||||
|
||||
if opts.impersonate is not None:
|
||||
opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower())
|
||||
|
||||
# MetadataParser
|
||||
def metadataparser_actions(f):
|
||||
if isinstance(f, str):
|
||||
cmd = f'--parse-metadata {shell_quote(f)}'
|
||||
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
|
||||
try:
|
||||
actions = [MetadataFromFieldPP.to_action(f)]
|
||||
except Exception as err:
|
||||
raise ValueError(f'{cmd} is invalid; {err}')
|
||||
else:
|
||||
cmd = f'--replace-in-metadata {shell_quote(f)}'
|
||||
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
|
||||
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
|
||||
|
||||
for action in actions:
|
||||
@ -425,17 +407,13 @@ def validate_options(opts):
|
||||
yield action
|
||||
|
||||
if opts.metafromtitle is not None:
|
||||
opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
|
||||
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata = {
|
||||
k: list(itertools.chain(*map(metadataparser_actions, v)))
|
||||
for k, v in opts.parse_metadata.items()
|
||||
}
|
||||
|
||||
# Other options
|
||||
opts.plugin_dirs = opts.plugin_dirs
|
||||
if opts.plugin_dirs is None:
|
||||
opts.plugin_dirs = ['default']
|
||||
|
||||
if opts.playlist_items is not None:
|
||||
try:
|
||||
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
|
||||
@ -482,7 +460,7 @@ def validate_options(opts):
|
||||
default_downloader = ed.get_basename()
|
||||
|
||||
for policy in opts.color.values():
|
||||
if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'):
|
||||
if policy not in ('always', 'auto', 'no_color', 'never'):
|
||||
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||
|
||||
warnings, deprecation_warnings = [], []
|
||||
@ -608,13 +586,6 @@ def validate_options(opts):
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
|
||||
|
||||
# compat option changes global state destructively; only allow from cli
|
||||
if 'allow-unsafe-ext' in opts.compat_opts:
|
||||
warnings.append(
|
||||
'Using allow-unsafe-ext opens you up to potential attacks. '
|
||||
'Use with great care!')
|
||||
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
|
||||
|
||||
return warnings, deprecation_warnings
|
||||
|
||||
|
||||
@ -625,7 +596,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'MetadataParser',
|
||||
'actions': actions,
|
||||
'when': when,
|
||||
'when': when
|
||||
}
|
||||
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
|
||||
if sponsorblock_query:
|
||||
@ -633,19 +604,19 @@ def get_postprocessors(opts):
|
||||
'key': 'SponsorBlock',
|
||||
'categories': sponsorblock_query,
|
||||
'api': opts.sponsorblock_api,
|
||||
'when': 'after_filter',
|
||||
'when': 'after_filter'
|
||||
}
|
||||
if opts.convertsubtitles:
|
||||
yield {
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
'when': 'before_dl',
|
||||
'when': 'before_dl'
|
||||
}
|
||||
if opts.convertthumbnails:
|
||||
yield {
|
||||
'key': 'FFmpegThumbnailsConvertor',
|
||||
'format': opts.convertthumbnails,
|
||||
'when': 'before_dl',
|
||||
'when': 'before_dl'
|
||||
}
|
||||
if opts.extractaudio:
|
||||
yield {
|
||||
@ -670,7 +641,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs,
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs
|
||||
}
|
||||
if not opts.writeautomaticsub and keep_subs:
|
||||
opts.writesubtitles = True
|
||||
@ -683,7 +654,7 @@ def get_postprocessors(opts):
|
||||
'remove_sponsor_segments': opts.sponsorblock_remove,
|
||||
'remove_ranges': opts.remove_ranges,
|
||||
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
|
||||
'force_keyframes': opts.force_keyframes_at_cuts,
|
||||
'force_keyframes': opts.force_keyframes_at_cuts
|
||||
}
|
||||
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
|
||||
# FFmpegExtractAudioPP as containers before conversion may not support
|
||||
@ -717,7 +688,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'EmbedThumbnail',
|
||||
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||
'already_have_thumbnail': opts.writethumbnail,
|
||||
'already_have_thumbnail': opts.writethumbnail
|
||||
}
|
||||
if not opts.writethumbnail:
|
||||
opts.writethumbnail = True
|
||||
@ -751,7 +722,7 @@ ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'u
|
||||
def parse_options(argv=None):
|
||||
"""@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
|
||||
parser, opts, urls = parseOpts(argv)
|
||||
urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
|
||||
urls = get_urls(urls, opts.batchfile, opts.verbose)
|
||||
|
||||
set_compat_opts(opts)
|
||||
try:
|
||||
@ -764,7 +735,7 @@ def parse_options(argv=None):
|
||||
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
|
||||
any_getting = any(getattr(opts, k) for k in (
|
||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
))
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
@ -859,7 +830,6 @@ def parse_options(argv=None):
|
||||
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
'progress_template': opts.progress_template,
|
||||
'progress_delta': opts.progress_delta,
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
@ -888,8 +858,8 @@ def parse_options(argv=None):
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslangs': opts.subtitleslangs,
|
||||
'matchtitle': opts.matchtitle,
|
||||
'rejecttitle': opts.rejecttitle,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'trim_file_name': opts.trim_file_name,
|
||||
@ -940,7 +910,6 @@ def parse_options(argv=None):
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
'impersonate': opts.impersonate,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval_requests': opts.sleep_interval_requests,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
@ -990,11 +959,6 @@ def _real_main(argv=None):
|
||||
if opts.ffmpeg_location:
|
||||
FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
|
||||
|
||||
# load all plugins into the global lookup
|
||||
plugin_dirs.value = opts.plugin_dirs
|
||||
if plugin_dirs.value:
|
||||
_load_all_plugins()
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
pre_process = opts.update_self or opts.rm_cachedir
|
||||
actual_use = all_urls or opts.load_info_filename
|
||||
@ -1015,68 +979,11 @@ def _real_main(argv=None):
|
||||
traceback.print_exc()
|
||||
ydl._download_retcode = 100
|
||||
|
||||
if opts.list_impersonate_targets:
|
||||
|
||||
known_targets = [
|
||||
# List of simplified targets we know are supported,
|
||||
# to help users know what dependencies may be required.
|
||||
(ImpersonateTarget('chrome'), 'curl_cffi'),
|
||||
(ImpersonateTarget('safari'), 'curl_cffi'),
|
||||
(ImpersonateTarget('firefox'), 'curl_cffi>=0.10'),
|
||||
(ImpersonateTarget('edge'), 'curl_cffi'),
|
||||
]
|
||||
|
||||
available_targets = ydl._get_available_impersonate_targets()
|
||||
|
||||
def make_row(target, handler):
|
||||
return [
|
||||
join_nonempty(target.client.title(), target.version, delim='-') or '-',
|
||||
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
|
||||
handler,
|
||||
]
|
||||
|
||||
rows = [make_row(target, handler) for target, handler in available_targets]
|
||||
|
||||
for known_target, known_handler in known_targets:
|
||||
if not any(
|
||||
known_target in target and known_handler.startswith(handler)
|
||||
for target, handler in available_targets
|
||||
):
|
||||
rows.insert(0, [
|
||||
ydl._format_out(text, ydl.Styles.SUPPRESS)
|
||||
for text in make_row(known_target, f'{known_handler} (unavailable)')
|
||||
])
|
||||
|
||||
ydl.to_screen('[info] Available impersonate targets')
|
||||
ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-'))
|
||||
return
|
||||
|
||||
if not actual_use:
|
||||
if pre_process:
|
||||
return ydl._download_retcode
|
||||
|
||||
args = sys.argv[1:] if argv is None else argv
|
||||
ydl.warn_if_short_id(args)
|
||||
|
||||
# Show a useful error message and wait for keypress if not launched from shell on Windows
|
||||
if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
|
||||
import ctypes.wintypes
|
||||
import msvcrt
|
||||
|
||||
kernel32 = ctypes.WinDLL('Kernel32')
|
||||
|
||||
buffer = (1 * ctypes.wintypes.DWORD)()
|
||||
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
|
||||
# If we only have a single process attached, then the executable was double clicked
|
||||
# When using `pyinstaller` with `--onefile`, two processes get attached
|
||||
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
|
||||
if attached_processes == 1 or (is_onefile and attached_processes == 2):
|
||||
print(parser._generate_error_message(
|
||||
'Do not double-click the executable, instead call it from a command line.\n'
|
||||
'Please read the README for further information on how to use yt-dlp: '
|
||||
'https://github.com/yt-dlp/yt-dlp#readme'))
|
||||
msvcrt.getch()
|
||||
_exit(2)
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type yt-dlp --help to see a list of all options.')
|
||||
@ -1095,10 +1002,11 @@ def _real_main(argv=None):
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
IN_CLI.value = True
|
||||
global _IN_CLI
|
||||
_IN_CLI = True
|
||||
try:
|
||||
_exit(*variadic(_real_main(argv)))
|
||||
except (CookieLoadError, DownloadError):
|
||||
except DownloadError:
|
||||
_exit(1)
|
||||
except SameFileError as e:
|
||||
_exit(f'ERROR: {e}')
|
||||
@ -1116,9 +1024,9 @@ def main(argv=None):
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
|
||||
__all__ = [
|
||||
'main',
|
||||
'YoutubeDL',
|
||||
'parse_options',
|
||||
'gen_extractors',
|
||||
'list_extractors',
|
||||
'main',
|
||||
'parse_options',
|
||||
]
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Execute with
|
||||
# $ python3 -m yt_dlp
|
||||
# $ python -m yt_dlp
|
||||
|
||||
import sys
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import sys
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules, collect_data_files
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
|
||||
|
||||
def pycryptodome_module():
|
||||
@ -10,7 +10,7 @@ def pycryptodome_module():
|
||||
try:
|
||||
import Crypto # noqa: F401
|
||||
print('WARNING: Using Crypto since Cryptodome is not available. '
|
||||
'Install with: python3 -m pip install pycryptodomex', file=sys.stderr)
|
||||
'Install with: pip install pycryptodomex', file=sys.stderr)
|
||||
return 'Crypto'
|
||||
except ImportError:
|
||||
pass
|
||||
@ -21,16 +21,12 @@ def get_hidden_imports():
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
# Only `websockets` is required, others are collected just in case
|
||||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
yield from collect_submodules('websockets')
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
|
||||
|
||||
hiddenimports = list(get_hidden_imports())
|
||||
print(f'Adding imports: {hiddenimports}')
|
||||
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
|
||||
|
||||
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']
|
||||
|
@ -3,6 +3,7 @@ from math import ceil
|
||||
|
||||
from .compat import compat_ord
|
||||
from .dependencies import Cryptodome
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
if Cryptodome.AES:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
@ -16,15 +17,15 @@ if Cryptodome.AES:
|
||||
else:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
|
||||
return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
|
||||
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv))))
|
||||
|
||||
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
|
||||
return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
|
||||
return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
|
||||
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@ -67,7 +68,7 @@ def pad_block(block, padding_mode):
|
||||
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
|
||||
|
||||
if padding_mode == 'iso7816' and padding_size:
|
||||
block = [*block, 0x80] # NB: += mutates list
|
||||
block = block + [0x80] # NB: += mutates list
|
||||
padding_size -= 1
|
||||
|
||||
return block + [PADDING_BYTE[padding_mode]] * padding_size
|
||||
@ -83,7 +84,7 @@ def aes_ecb_encrypt(data, key, iv=None):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
@ -103,13 +104,15 @@ def aes_ecb_decrypt(data, key, iv=None):
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
encrypted_data += aes_decrypt(block, expanded_key)
|
||||
return encrypted_data[:len(data)]
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, iv):
|
||||
@ -134,7 +137,7 @@ def aes_ctr_encrypt(data, key, iv):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
counter = iter_vector(iv)
|
||||
|
||||
encrypted_data = []
|
||||
@ -145,7 +148,9 @@ def aes_ctr_encrypt(data, key, iv):
|
||||
|
||||
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
|
||||
encrypted_data += xor(block, cipher_counter_block)
|
||||
return encrypted_data[:len(data)]
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
@ -158,7 +163,7 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
decrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
@ -169,7 +174,9 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
decrypted_block = aes_decrypt(block, expanded_key)
|
||||
decrypted_data += xor(decrypted_block, previous_cipher_block)
|
||||
previous_cipher_block = block
|
||||
return decrypted_data[:len(data)]
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
|
||||
|
||||
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
|
||||
@ -183,7 +190,7 @@ def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
@ -217,10 +224,10 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
|
||||
|
||||
if len(nonce) == 12:
|
||||
j0 = [*nonce, 0, 0, 0, 1]
|
||||
j0 = nonce + [0, 0, 0, 1]
|
||||
else:
|
||||
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
|
||||
ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
j0 = ghash(hash_subkey, ghash_in)
|
||||
|
||||
# TODO: add nonce support to aes_ctr_decrypt
|
||||
@ -229,17 +236,17 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
iv_ctr = inc(j0)
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
|
||||
pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
|
||||
pad_len = len(data) // 16 * 16
|
||||
s_tag = ghash(
|
||||
hash_subkey,
|
||||
data
|
||||
+ [0] * pad_len # pad
|
||||
+ list((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
|
||||
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data
|
||||
)
|
||||
|
||||
if tag != aes_ctr_encrypt(s_tag, key, j0):
|
||||
raise ValueError('Mismatching authentication tag')
|
||||
raise ValueError("Mismatching authentication tag")
|
||||
|
||||
return decrypted_data
|
||||
|
||||
@ -281,7 +288,9 @@ def aes_decrypt(data, expanded_key):
|
||||
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
|
||||
data = shift_rows_inv(data)
|
||||
data = sub_bytes_inv(data)
|
||||
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_decrypt_text(data, password, key_size_bytes):
|
||||
@ -299,8 +308,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = list(base64.b64decode(data))
|
||||
password = list(password.encode())
|
||||
data = bytes_to_intlist(base64.b64decode(data))
|
||||
password = bytes_to_intlist(password.encode())
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
|
||||
@ -309,7 +318,9 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
|
||||
return bytes(decrypted_data)
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
@ -417,7 +428,9 @@ def key_expansion(data):
|
||||
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
|
||||
temp = data[-4:]
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
return data[:expanded_key_size_bytes]
|
||||
data = data[:expanded_key_size_bytes]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def iter_vector(iv):
|
||||
@ -498,7 +511,7 @@ def block_product(block_x, block_y):
|
||||
# NIST SP 800-38D, Algorithm 1
|
||||
|
||||
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
|
||||
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
|
||||
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
|
||||
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
|
||||
block_v = block_y[:]
|
||||
@ -521,7 +534,7 @@ def ghash(subkey, data):
|
||||
# NIST SP 800-38D, Algorithm 2
|
||||
|
||||
if len(data) % BLOCK_SIZE_BYTES:
|
||||
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
|
||||
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
|
||||
last_y = [0] * BLOCK_SIZE_BYTES
|
||||
for i in range(0, len(data), BLOCK_SIZE_BYTES):
|
||||
@ -534,17 +547,19 @@ def ghash(subkey, data):
|
||||
__all__ = [
|
||||
'aes_cbc_decrypt',
|
||||
'aes_cbc_decrypt_bytes',
|
||||
'aes_cbc_encrypt',
|
||||
'aes_cbc_encrypt_bytes',
|
||||
'aes_ctr_decrypt',
|
||||
'aes_ctr_encrypt',
|
||||
'aes_decrypt',
|
||||
'aes_decrypt_text',
|
||||
'aes_decrypt',
|
||||
'aes_ecb_decrypt',
|
||||
'aes_ecb_encrypt',
|
||||
'aes_encrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
|
||||
'aes_cbc_encrypt',
|
||||
'aes_cbc_encrypt_bytes',
|
||||
'aes_ctr_encrypt',
|
||||
'aes_ecb_encrypt',
|
||||
'aes_encrypt',
|
||||
|
||||
'key_expansion',
|
||||
'pad_block',
|
||||
'pkcs7_padding',
|
||||
|
@ -81,10 +81,10 @@ class Cache:
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
|
||||
self._ydl.to_screen(
|
||||
f'Removing cache dir {cachedir} .', skip_eol=True)
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
|
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
5
plugins/youtube_download/yt_dlp/casefold.py
Normal file
@ -0,0 +1,5 @@
|
||||
import warnings
|
||||
|
||||
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
|
||||
|
||||
casefold = str.casefold
|
@ -1,4 +1,5 @@
|
||||
import os
|
||||
import sys
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
@ -23,14 +24,36 @@ def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
import re
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
else:
|
||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
return c if isinstance(c, int) else ord(c)
|
||||
|
||||
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||
def compat_realpath(path):
|
||||
while os.path.islink(path):
|
||||
path = os.path.abspath(os.readlink(path))
|
||||
return os.path.realpath(path)
|
||||
else:
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
|
||||
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/792
|
||||
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
|
||||
if os.name in ('nt', 'ce'):
|
||||
if compat_os_name in ('nt', 'ce'):
|
||||
def compat_expanduser(path):
|
||||
HOME = os.environ.get('HOME')
|
||||
if not HOME:
|
||||
|
@ -8,14 +8,16 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import functools # noqa: F401
|
||||
import os
|
||||
import base64
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
compat_str = str
|
||||
|
||||
compat_os_name = os.name
|
||||
compat_realpath = os.path.realpath
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
|
||||
def compat_shlex_quote(s):
|
||||
from ..utils import shell_quote
|
||||
return shell_quote(s)
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
@ -30,12 +30,11 @@ from asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from . import compat_expanduser, compat_HTMLParseError
|
||||
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
@ -71,6 +70,7 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
compat_os_path_realpath = os.path.realpath
|
||||
compat_os_path_realpath = compat_realpath
|
||||
compat_print = print
|
||||
compat_shlex_split = shlex.split
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
@ -104,12 +104,5 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
workaround_optparse_bug9161 = lambda: None
|
||||
compat_str = str
|
||||
compat_b64decode = base64.b64decode
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
||||
legacy = []
|
||||
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
||||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
|
||||
callback(attr)
|
||||
return ret
|
||||
|
||||
@functools.cache
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def from_child(attr):
|
||||
nonlocal child
|
||||
if attr not in allowed_attributes:
|
||||
|
26
plugins/youtube_download/yt_dlp/compat/functools.py
Normal file
26
plugins/youtube_download/yt_dlp/compat/functools.py
Normal file
@ -0,0 +1,26 @@
|
||||
# flake8: noqa: F405
|
||||
from functools import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'functools')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
cache # >= 3.9
|
||||
except NameError:
|
||||
cache = lru_cache(maxsize=None)
|
||||
|
||||
try:
|
||||
cached_property # >= 3.8
|
||||
except NameError:
|
||||
class cached_property:
|
||||
def __init__(self, func):
|
||||
update_wrapper(self, func)
|
||||
self.func = func
|
||||
|
||||
def __get__(self, instance, _):
|
||||
if instance is None:
|
||||
return self
|
||||
setattr(instance, self.func.__name__, self.func(instance))
|
||||
return getattr(instance, self.func.__name__)
|
@ -1,22 +1,16 @@
|
||||
tests = {
|
||||
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
|
||||
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
|
||||
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
|
||||
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
|
||||
}
|
||||
|
||||
|
||||
def what(file=None, h=None):
|
||||
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
||||
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
|
||||
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
|
||||
"""
|
||||
if h is None:
|
||||
with open(file, 'rb') as f:
|
||||
h = f.read(12)
|
||||
|
||||
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
|
||||
return 'webp'
|
||||
|
||||
if h.startswith(b'\x89PNG'):
|
||||
return 'png'
|
||||
|
||||
if h.startswith(b'\xFF\xD8\xFF'):
|
||||
return 'jpeg'
|
||||
|
||||
if h.startswith(b'GIF'):
|
||||
return 'gif'
|
||||
|
||||
return None
|
||||
return next((type_ for type_, test in tests.items() if test(h)), None)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request # noqa: F821
|
||||
del request
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
@ -7,13 +7,13 @@ passthrough_module(__name__, 'urllib.request')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
import os
|
||||
from .. import compat_os_name
|
||||
|
||||
if os.name == 'nt':
|
||||
# On older Python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
if compat_os_name == 'nt':
|
||||
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||
# it to http on these older Python versions to avoid issues
|
||||
# it to http on these older python versions to avoid issues
|
||||
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
|
||||
# 1: https://github.com/python/cpython/issues/86793
|
||||
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
|
||||
@ -37,4 +37,4 @@ if os.name == 'nt':
|
||||
def getproxies():
|
||||
return getproxies_environment() or getproxies_registry_patched()
|
||||
|
||||
del os
|
||||
del compat_os_name
|
||||
|
@ -1,10 +1,6 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import datetime as dt
|
||||
import functools
|
||||
import glob
|
||||
import hashlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
@ -18,13 +14,16 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -32,8 +31,6 @@ from .dependencies import (
|
||||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
YoutubeDLError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
@ -46,7 +43,7 @@ from .utils import (
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
@ -86,12 +83,7 @@ def _create_progress_bar(logger):
|
||||
return printer
|
||||
|
||||
|
||||
class CookieLoadError(YoutubeDLError):
|
||||
pass
|
||||
|
||||
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
try:
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
@ -109,8 +101,6 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
except Exception:
|
||||
raise CookieLoadError('failed to load cookies')
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
|
||||
@ -128,18 +118,17 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not sqlite3:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
'Please use a Python interpreter compiled with sqlite3 support')
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_roots = list(_firefox_browser_dirs())
|
||||
search_root = _firefox_browser_dir()
|
||||
elif _is_path(profile):
|
||||
search_roots = [profile]
|
||||
search_root = profile
|
||||
else:
|
||||
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||
search_root = ', '.join(map(repr, search_roots))
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
|
||||
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
@ -153,7 +142,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
|
||||
)), None)
|
||||
if not isinstance(container_id, int):
|
||||
raise ValueError(f'could not find firefox container "{container}" in containers.json')
|
||||
@ -193,28 +182,12 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dirs():
|
||||
def _firefox_browser_dir():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
yield from map(os.path.expandvars, (
|
||||
R'%APPDATA%\Mozilla\Firefox\Profiles',
|
||||
R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
|
||||
))
|
||||
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
elif sys.platform == 'darwin':
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, (
|
||||
'~/.mozilla/firefox',
|
||||
'~/snap/firefox/common/.mozilla/firefox',
|
||||
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||
))
|
||||
|
||||
|
||||
def _firefox_cookie_dbs(roots):
|
||||
for root in map(os.path.abspath, roots):
|
||||
for pattern in ('', '*/', 'Profiles/*/'):
|
||||
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
@ -229,7 +202,6 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
@ -241,7 +213,6 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
@ -253,7 +224,6 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
@ -265,7 +235,6 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
@ -273,7 +242,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
return {
|
||||
'browser_dir': browser_dir,
|
||||
'keyring_name': keyring_name,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles
|
||||
}
|
||||
|
||||
|
||||
@ -282,7 +251,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
|
||||
if not sqlite3:
|
||||
logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
|
||||
'Please use a Python interpreter compiled with sqlite3 support')
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
config = _get_chromium_based_browser_settings(browser_name)
|
||||
@ -299,23 +268,17 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
logger.error(f'{browser_name} does not support profiles')
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
||||
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
|
||||
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
|
||||
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
|
||||
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
|
||||
decryptor = get_cookie_decryptor(
|
||||
config['browser_dir'], config['keyring_name'], logger,
|
||||
keyring=keyring, meta_version=meta_version)
|
||||
|
||||
cursor.connection.text_factory = bytes
|
||||
column_names = _get_column_names(cursor, 'cookies')
|
||||
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
||||
@ -344,12 +307,6 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
counts['unencrypted'] = unencrypted_cookies
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if os.name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
raise
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
@ -367,11 +324,6 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||
if value is None:
|
||||
return is_encrypted, None
|
||||
|
||||
# In chrome, session cookies have expires_utc set to 0
|
||||
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||
if not expires_utc:
|
||||
expires_utc = None
|
||||
|
||||
return is_encrypted, http.cookiejar.Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
@ -413,23 +365,22 @@ class ChromeCookieDecryptor:
|
||||
raise NotImplementedError('Must be implemented by sub classes')
|
||||
|
||||
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
|
||||
if sys.platform == 'darwin':
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
elif sys.platform in ('win32', 'cygwin'):
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger)
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
|
||||
|
||||
|
||||
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
self._empty_key = self.derive_key(b'')
|
||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||
self._browser_keyring_name = browser_keyring_name
|
||||
self._keyring = keyring
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
@functools.cached_property
|
||||
def _v11_key(self):
|
||||
@ -458,18 +409,14 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
if version == b'v10':
|
||||
self._cookie_counts['v10'] += 1
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v10_key, self._empty_key), self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
||||
|
||||
elif version == b'v11':
|
||||
self._cookie_counts['v11'] += 1
|
||||
if self._v11_key is None:
|
||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v11_key, self._empty_key), self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
||||
|
||||
else:
|
||||
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||
@ -478,12 +425,11 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
|
||||
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger, meta_version=None):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
self._logger = logger
|
||||
password = _get_mac_keyring_password(browser_keyring_name, logger)
|
||||
self._v10_key = None if password is None else self.derive_key(password)
|
||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
@ -501,8 +447,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc_multi(
|
||||
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
|
||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
@ -512,11 +457,10 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
|
||||
|
||||
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_root, logger, meta_version=None):
|
||||
def __init__(self, browser_root, logger):
|
||||
self._logger = logger
|
||||
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||
self._meta_version = meta_version or 0
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
@ -540,9 +484,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
||||
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
||||
|
||||
return _decrypt_aes_gcm(
|
||||
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
|
||||
hash_prefix=self._meta_version >= 24)
|
||||
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
|
||||
|
||||
else:
|
||||
self._cookie_counts['other'] += 1
|
||||
@ -633,7 +575,7 @@ class DataParser:
|
||||
|
||||
|
||||
def _mac_absolute_time_to_posix(timestamp):
|
||||
return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
|
||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
||||
|
||||
|
||||
def _parse_safari_cookies_header(data, logger):
|
||||
@ -766,19 +708,20 @@ def _get_linux_desktop_environment(env, logger):
|
||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||
if xdg_current_desktop is not None:
|
||||
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||
if part == 'Unity':
|
||||
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
|
||||
|
||||
if xdg_current_desktop == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif part == 'Deepin':
|
||||
elif xdg_current_desktop == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif part == 'GNOME':
|
||||
elif xdg_current_desktop == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif part == 'X-Cinnamon':
|
||||
elif xdg_current_desktop == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif part == 'KDE':
|
||||
elif xdg_current_desktop == 'KDE':
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
@ -789,14 +732,15 @@ def _get_linux_desktop_environment(env, logger):
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif part == 'Pantheon':
|
||||
elif xdg_current_desktop == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif part == 'XFCE':
|
||||
elif xdg_current_desktop == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif part == 'UKUI':
|
||||
elif xdg_current_desktop == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif part == 'LXQt':
|
||||
elif xdg_current_desktop == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
else:
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
@ -850,7 +794,7 @@ def _choose_linux_keyring(logger):
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
linux_keyring = _LinuxKeyring.KWALLET6
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
@ -885,7 +829,7 @@ def _get_kwallet_network_wallet(keyring, logger):
|
||||
'dbus-send', '--session', '--print-reply=literal',
|
||||
f'--dest={service_name}',
|
||||
wallet_path,
|
||||
'org.kde.KWallet.networkWallet',
|
||||
'org.kde.KWallet.networkWallet'
|
||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@ -915,7 +859,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||
'kwallet-query',
|
||||
'--read-password', f'{browser_keyring_name} Safe Storage',
|
||||
'--folder', f'{browser_keyring_name} Keys',
|
||||
network_wallet,
|
||||
network_wallet
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@ -955,6 +899,7 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
|
||||
for item in col.get_all_items():
|
||||
if item.get_label() == f'{browser_keyring_name} Safe Storage':
|
||||
return item.get_secret()
|
||||
else:
|
||||
logger.error('failed to read from keyring')
|
||||
return b''
|
||||
|
||||
@ -1002,7 +947,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
@ -1025,15 +970,13 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
|
||||
|
||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||
for key in keys:
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
if hash_prefix:
|
||||
return plaintext[32:].decode()
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
@ -1041,7 +984,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
try:
|
||||
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
|
||||
except ValueError:
|
||||
@ -1049,8 +992,6 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_pr
|
||||
return None
|
||||
|
||||
try:
|
||||
if hash_prefix:
|
||||
return plaintext[32:].decode()
|
||||
return plaintext.decode()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
@ -1080,12 +1021,11 @@ def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
None, # pvReserved: must be NULL
|
||||
None, # pPromptStruct: information about prompts to display
|
||||
0, # dwFlags
|
||||
ctypes.byref(blob_out), # pDataOut
|
||||
ctypes.byref(blob_out) # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
logger.warning('failed to decrypt with DPAPI', only_once=True)
|
||||
return None
|
||||
|
||||
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
|
||||
ctypes.windll.kernel32.LocalFree(blob_out.pbData)
|
||||
@ -1109,20 +1049,17 @@ def _get_column_names(cursor, table_name):
|
||||
return [row[1].decode() for row in table_info]
|
||||
|
||||
|
||||
def _newest(files):
|
||||
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||
|
||||
|
||||
def _find_files(root, filename, logger):
|
||||
def _find_most_recently_used_file(root, filename, logger):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
i = 0
|
||||
i, paths = 0, []
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
for curr_root, _, files in os.walk(root):
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
for file in files:
|
||||
i += 1
|
||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||
if file == filename:
|
||||
yield os.path.join(curr_root, file)
|
||||
paths.append(os.path.join(curr_root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
@ -1136,7 +1073,7 @@ def _merge_cookie_jars(jars):
|
||||
|
||||
|
||||
def _is_path(value):
|
||||
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||
return os.path.sep in value
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
@ -1157,24 +1094,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||
|
||||
_RESERVED = {
|
||||
'expires',
|
||||
'path',
|
||||
'comment',
|
||||
'domain',
|
||||
'max-age',
|
||||
'secure',
|
||||
'httponly',
|
||||
'version',
|
||||
'samesite',
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"domain",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
"samesite",
|
||||
}
|
||||
|
||||
_FLAGS = {'secure', 'httponly'}
|
||||
_FLAGS = {"secure", "httponly"}
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r'''
|
||||
_COOKIE_PATTERN = re.compile(r"""
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
|
||||
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
@ -1184,7 +1121,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
||||
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
| # or
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
@ -1192,7 +1129,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
''', re.ASCII | re.VERBOSE)
|
||||
""", re.ASCII | re.VERBOSE)
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
@ -1279,8 +1216,8 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if ((not ignore_discard and cookie.discard)
|
||||
or (not ignore_expires and cookie.is_expired(now))):
|
||||
if (not ignore_discard and cookie.discard
|
||||
or not ignore_expires and cookie.is_expired(now)):
|
||||
continue
|
||||
name, value = cookie.name, cookie.value
|
||||
if value is None:
|
||||
@ -1288,14 +1225,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
# with no name, whereas http.cookiejar regards it as a
|
||||
# cookie with no value.
|
||||
name, value = '', name
|
||||
f.write('{}\n'.format('\t'.join((
|
||||
f.write('%s\n' % '\t'.join((
|
||||
cookie.domain,
|
||||
self._true_or_false(cookie.domain.startswith('.')),
|
||||
cookie.path,
|
||||
self._true_or_false(cookie.secure),
|
||||
str_or_none(cookie.expires, default=''),
|
||||
name, value,
|
||||
))))
|
||||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
@ -1334,10 +1271,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
return line
|
||||
cookie_list = line.split('\t')
|
||||
if len(cookie_list) != self._ENTRY_LEN:
|
||||
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
|
||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||
cookie = self._CookieFileEntry(*cookie_list)
|
||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
|
||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||
return line
|
||||
|
||||
cf = io.StringIO()
|
||||
|
@ -24,7 +24,7 @@ try:
|
||||
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
|
||||
from Crypto.Hash import CMAC, SHA1 # noqa: F401
|
||||
from Crypto.PublicKey import RSA # noqa: F401
|
||||
except (ImportError, OSError):
|
||||
except ImportError:
|
||||
__version__ = f'broken {__version__}'.strip()
|
||||
|
||||
|
||||
|
@ -43,28 +43,19 @@ except Exception as _err:
|
||||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile Python without
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
sqlite3 = None
|
||||
|
||||
|
||||
try:
|
||||
import websockets
|
||||
except ImportError:
|
||||
except (ImportError, SyntaxError):
|
||||
# websockets 3.10 on python 3.6 causes SyntaxError
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import urllib3
|
||||
except ImportError:
|
||||
urllib3 = None
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
@ -74,10 +65,6 @@ else:
|
||||
if hasattr(xattr, 'set'): # pyxattr
|
||||
xattr._yt_dlp__identifier = 'pyxattr'
|
||||
|
||||
try:
|
||||
import curl_cffi
|
||||
except ImportError:
|
||||
curl_cffi = None
|
||||
|
||||
from . import Cryptodome
|
||||
|
||||
|
@ -30,12 +30,11 @@ from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoLiveFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
from .youtube_live_chat import YoutubeLiveChatFD
|
||||
from .bunnycdn import BunnyCdnFD
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
@ -50,12 +49,12 @@ PROTOCOL_MAP = {
|
||||
'http_dash_segments_generator': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||
'bunnycdn': BunnyCdnFD,
|
||||
}
|
||||
|
||||
|
||||
@ -66,6 +65,7 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
'rtmp_ffmpeg': 'rtmpF',
|
||||
'http_dash_segments': 'dash',
|
||||
'http_dash_segments_generator': 'dashG',
|
||||
'niconico_dmc': 'dmc',
|
||||
'websocket_frag': 'WSfrag',
|
||||
}
|
||||
if simplify:
|
||||
|
@ -1,50 +0,0 @@
|
||||
import hashlib
|
||||
import random
|
||||
import threading
|
||||
|
||||
from .common import FileDownloader
|
||||
from . import HlsFD
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
|
||||
|
||||
class BunnyCdnFD(FileDownloader):
|
||||
"""
|
||||
Downloads from BunnyCDN with required pings
|
||||
Note, this is not a part of public API, and will be removed without notice.
|
||||
DO NOT USE
|
||||
"""
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
|
||||
|
||||
fd = HlsFD(self.ydl, self.params)
|
||||
|
||||
stop_event = threading.Event()
|
||||
ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
|
||||
ping_thread.start()
|
||||
|
||||
try:
|
||||
return fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
stop_event.set()
|
||||
|
||||
def ping_thread(self, stop_event, url, headers, secret, context_id):
|
||||
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
|
||||
ping_interval = 2
|
||||
# Hard coded resolution as it doesn't seem to matter
|
||||
res = 1080
|
||||
paused = 'false'
|
||||
current_time = 0
|
||||
|
||||
while not stop_event.wait(ping_interval):
|
||||
current_time += ping_interval
|
||||
|
||||
time = current_time + round(random.random(), 6)
|
||||
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
||||
ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
|
||||
|
||||
try:
|
||||
self.ydl.urlopen(Request(ping_url, headers=headers)).read()
|
||||
except network_exceptions as e:
|
||||
self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')
|
@ -4,7 +4,6 @@ import functools
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
|
||||
from ..minicurses import (
|
||||
@ -20,7 +19,9 @@ from ..utils import (
|
||||
Namespace,
|
||||
RetryManager,
|
||||
classproperty,
|
||||
decodeArgument,
|
||||
deprecation_warning,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
parse_bytes,
|
||||
@ -31,7 +32,6 @@ from ..utils import (
|
||||
timetuple_from_msec,
|
||||
try_call,
|
||||
)
|
||||
from ..utils._utils import _ProgressState
|
||||
|
||||
|
||||
class FileDownloader:
|
||||
@ -63,7 +63,6 @@ class FileDownloader:
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
progress_delta: The minimum time between progress output, in seconds
|
||||
external_downloader_args: A dictionary of downloader keys (in lower case)
|
||||
and a list of additional command-line arguments for the
|
||||
executable. Use 'default' as the name for arguments to be
|
||||
@ -89,9 +88,6 @@ class FileDownloader:
|
||||
self.params = params
|
||||
self._prepare_multiline_status()
|
||||
self.add_progress_hook(self.report_progress)
|
||||
if self.params.get('progress_delta'):
|
||||
self._progress_delta_lock = threading.Lock()
|
||||
self._progress_delta_time = time.monotonic()
|
||||
|
||||
def _set_ydl(self, ydl):
|
||||
self.ydl = ydl
|
||||
@ -218,7 +214,7 @@ class FileDownloader:
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == '-' or \
|
||||
(os.path.exists(filename) and not os.path.isfile(filename)):
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + '.part'
|
||||
|
||||
@ -272,7 +268,7 @@ class FileDownloader:
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(filename):
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
@ -334,7 +330,7 @@ class FileDownloader:
|
||||
progress_dict), s.get('progress_idx') or 0)
|
||||
self.to_console_title(self.ydl.evaluate_outtmpl(
|
||||
progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
|
||||
progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
|
||||
progress_dict))
|
||||
|
||||
def _format_progress(self, *args, **kwargs):
|
||||
return self.ydl._format_text(
|
||||
@ -358,7 +354,6 @@ class FileDownloader:
|
||||
'_speed_str': self.format_speed(speed).strip(),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
'_percent': 100.0,
|
||||
'_percent_str': self.format_percent(100),
|
||||
})
|
||||
self._report_progress_status(s, join_nonempty(
|
||||
@ -371,21 +366,13 @@ class FileDownloader:
|
||||
if s['status'] != 'downloading':
|
||||
return
|
||||
|
||||
if update_delta := self.params.get('progress_delta'):
|
||||
with self._progress_delta_lock:
|
||||
if time.monotonic() < self._progress_delta_time:
|
||||
return
|
||||
self._progress_delta_time += update_delta
|
||||
|
||||
progress = try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)
|
||||
s.update({
|
||||
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
||||
'_speed_str': self.format_speed(s.get('speed')),
|
||||
'_percent': progress,
|
||||
'_percent_str': self.format_percent(progress),
|
||||
'_percent_str': self.format_percent(try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
|
||||
@ -406,7 +393,7 @@ class FileDownloader:
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen(f'[download] Resuming download at byte {resume_len}')
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
|
||||
"""Report retry"""
|
||||
@ -434,13 +421,13 @@ class FileDownloader:
|
||||
"""
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(filename)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True)
|
||||
and os.path.isfile(filename)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
@ -450,7 +437,7 @@ class FileDownloader:
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(filename),
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
}, info_dict)
|
||||
self._finish_multiline_status()
|
||||
return True, False
|
||||
@ -491,7 +478,9 @@ class FileDownloader:
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(args[0])
|
||||
str_args = [decodeArgument(a) for a in args]
|
||||
|
||||
self.write_debug(f'{exe} command line: {shell_quote(args)}')
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
|
||||
self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
|
||||
|
@ -15,15 +15,12 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
|
||||
real_downloader = None # No external FD can support --live-from-start
|
||||
else:
|
||||
if info_dict.get('is_live'):
|
||||
if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}:
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
real_start = time.time()
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
|
||||
args = []
|
||||
|
@ -1,5 +1,4 @@
|
||||
import enum
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -10,6 +9,7 @@ import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
@ -23,6 +23,7 @@ from ..utils import (
|
||||
cli_valueless_option,
|
||||
determine_ext,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
find_available_port,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
@ -54,7 +55,7 @@ class ExternalFD(FragmentFD):
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
@ -66,7 +67,7 @@ class ExternalFD(FragmentFD):
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
@ -107,7 +108,7 @@ class ExternalFD(FragmentFD):
|
||||
return all((
|
||||
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
|
||||
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
|
||||
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
|
||||
))
|
||||
|
||||
@ -171,7 +172,7 @@ class ExternalFD(FragmentFD):
|
||||
decrypt_fragment = self.decrypter(info_dict)
|
||||
dest, _ = self.sanitize_open(tmpfilename, 'wb')
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
|
||||
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
|
||||
try:
|
||||
src, _ = self.sanitize_open(fragment_filename, 'rb')
|
||||
except OSError as err:
|
||||
@ -183,9 +184,9 @@ class ExternalFD(FragmentFD):
|
||||
dest.write(decrypt_fragment(fragment, src.read()))
|
||||
src.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(fragment_filename)
|
||||
self.try_remove(encodeFilename(fragment_filename))
|
||||
dest.close()
|
||||
self.try_remove(f'{tmpfilename}.frag.urls')
|
||||
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
@ -334,12 +335,12 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--auto-file-renaming=false']
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--uri-selector=inorder']
|
||||
url_list_file = f'{tmpfilename}.frag.urls'
|
||||
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||
url_list_file = '%s.frag.urls' % tmpfilename
|
||||
url_list = []
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
|
||||
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||
stream.write('\n'.join(url_list).encode())
|
||||
stream.close()
|
||||
@ -356,7 +357,7 @@ class Aria2cFD(ExternalFD):
|
||||
'id': sanitycheck,
|
||||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode()
|
||||
}).encode('utf-8')
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
@ -415,7 +416,7 @@ class Aria2cFD(ExternalFD):
|
||||
'total_bytes_estimate': total,
|
||||
'eta': (total - downloaded) / (speed or 1),
|
||||
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
|
||||
'elapsed': time.time() - started,
|
||||
'elapsed': time.time() - started
|
||||
})
|
||||
self._hook_progress(status, info_dict)
|
||||
|
||||
@ -457,6 +458,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
@ -488,7 +491,7 @@ class FFmpegFD(ExternalFD):
|
||||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
|
||||
|
||||
# These exists only for compatibility. Extractors should use
|
||||
# info_dict['downloader_options']['ffmpeg_args'] instead
|
||||
@ -505,13 +508,13 @@ class FFmpegFD(ExternalFD):
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'[\da-zA-Z]+://', proxy):
|
||||
proxy = f'http://{proxy}'
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.')
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
@ -556,7 +559,7 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
is_http = re.match(r'https?://', fmt['url'])
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
@ -572,7 +575,7 @@ class FFmpegFD(ExternalFD):
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
|
||||
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
|
||||
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
args += ['-c', 'copy']
|
||||
@ -612,12 +615,10 @@ class FFmpegFD(ExternalFD):
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
|
||||
|
||||
args += self._configuration_args(('_o1', '_o', ''))
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
self._debug_cmd(args)
|
||||
|
||||
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
|
||||
|
@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
|
||||
self.read_bytes(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for _ in range(quality_entry_count):
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
segment_run_count = self.read_unsigned_int()
|
||||
segments = []
|
||||
for _ in range(segment_run_count):
|
||||
for i in range(segment_run_count):
|
||||
first_segment = self.read_unsigned_int()
|
||||
fragments_per_segment = self.read_unsigned_int()
|
||||
segments.append((first_segment, fragments_per_segment))
|
||||
@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualitySegmentUrlModifiers
|
||||
for _ in range(quality_entry_count):
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
fragments_count = self.read_unsigned_int()
|
||||
fragments = []
|
||||
for _ in range(fragments_count):
|
||||
for i in range(fragments_count):
|
||||
first = self.read_unsigned_int()
|
||||
first_ts = self.read_unsigned_long_long()
|
||||
duration = self.read_unsigned_int()
|
||||
@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
|
||||
self.read_string() # MovieIdentifier
|
||||
server_count = self.read_unsigned_char()
|
||||
# ServerEntryTable
|
||||
for _ in range(server_count):
|
||||
for i in range(server_count):
|
||||
self.read_string()
|
||||
quality_count = self.read_unsigned_char()
|
||||
# QualityEntryTable
|
||||
for _ in range(quality_count):
|
||||
for i in range(quality_count):
|
||||
self.read_string()
|
||||
# DrmData
|
||||
self.read_string()
|
||||
@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
segments_count = self.read_unsigned_char()
|
||||
segments = []
|
||||
for _ in range(segments_count):
|
||||
for i in range(segments_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'asrt'
|
||||
segment = FlvReader(box_data).read_asrt()
|
||||
segments.append(segment)
|
||||
fragments_run_count = self.read_unsigned_char()
|
||||
fragments = []
|
||||
for _ in range(fragments_run_count):
|
||||
for i in range(fragments_run_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'afrt'
|
||||
fragments.append(FlvReader(box_data).read_afrt())
|
||||
@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = next(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
|
||||
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
|
||||
man_base_url = get_base_url(doc) or man_url
|
||||
|
@ -9,11 +9,11 @@ import time
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, traverse_obj
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
from ..utils.progress import ProgressCalculator
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
@ -151,7 +151,7 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(ctx['fragment_filename_sanitized'])
|
||||
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
@ -187,7 +187,7 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
|
||||
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||
continuedl = self.params.get('continuedl', True)
|
||||
if continuedl and ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
@ -198,7 +198,7 @@ class FragmentFD(FileDownloader):
|
||||
'.ytdl file is corrupt' if is_corrupt else
|
||||
'Inconsistent state of incomplete fragment download')
|
||||
self.report_warning(
|
||||
f'{message}. Restarting from the beginning ...')
|
||||
'%s. Restarting from the beginning ...' % message)
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
@ -226,7 +226,8 @@ class FragmentFD(FileDownloader):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
ctx_id = ctx.get('ctx_id')
|
||||
# Stores the download progress, updated by the progress hook
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': resume_len,
|
||||
@ -236,8 +237,14 @@ class FragmentFD(FileDownloader):
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
|
||||
ctx['started'] = time.time()
|
||||
progress = ProgressCalculator(resume_len)
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
'fragment_started': start,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
})
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
@ -252,35 +259,38 @@ class FragmentFD(FileDownloader):
|
||||
state['max_progress'] = ctx.get('max_progress')
|
||||
state['progress_idx'] = ctx.get('progress_idx')
|
||||
|
||||
state['elapsed'] = progress.elapsed
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
|
||||
# XXX: Fragment resume is not accounted for here
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
/ (state['fragment_index'] + 1) * total_frags)
|
||||
progress.total = estimated_size
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
state['total_bytes_estimate'] = progress.total
|
||||
else:
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
progress.thread_reset()
|
||||
|
||||
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
|
||||
state['speed'] = ctx['speed'] = progress.speed.smooth
|
||||
state['eta'] = progress.eta.smooth
|
||||
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_total_bytes)
|
||||
ctx['fragment_started'] = time.time()
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return ctx['started']
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
@ -365,10 +375,10 @@ class FragmentFD(FileDownloader):
|
||||
return decrypt_fragment
|
||||
|
||||
def download_and_append_fragments_multiple(self, *args, **kwargs):
|
||||
"""
|
||||
'''
|
||||
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
|
||||
all args must be either tuple or list
|
||||
"""
|
||||
'''
|
||||
interrupt_trigger = [True]
|
||||
max_progress = len(args)
|
||||
if max_progress == 1:
|
||||
@ -389,7 +399,7 @@ class FragmentFD(FileDownloader):
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
if os.name == 'nt':
|
||||
if compat_os_name == 'nt':
|
||||
def future_result(future):
|
||||
while True:
|
||||
try:
|
||||
@ -423,7 +433,7 @@ class FragmentFD(FileDownloader):
|
||||
finally:
|
||||
tpe.shutdown(wait=True)
|
||||
if not interrupt_trigger[0] and not is_live:
|
||||
raise KeyboardInterrupt
|
||||
raise KeyboardInterrupt()
|
||||
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
|
||||
# so returning a intermediate result here instead of KeyboardInterrupt on live
|
||||
return result
|
||||
@ -490,6 +500,7 @@ class FragmentFD(FileDownloader):
|
||||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
@ -16,7 +16,6 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
|
||||
|
||||
class HlsFD(FragmentFD):
|
||||
@ -73,23 +72,11 @@ class HlsFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
s = info_dict.get('hls_media_playlist_data')
|
||||
if s:
|
||||
self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info')
|
||||
else:
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
s_bytes = urlh.read()
|
||||
if self.params.get('write_pages'):
|
||||
dump_filename = _request_dump_filename(
|
||||
man_url, info_dict['id'], None,
|
||||
trim_length=self.params.get('trim_file_name'))
|
||||
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
|
||||
with open(dump_filename, 'wb') as outf:
|
||||
outf.write(s_bytes)
|
||||
s = s_bytes.decode('utf-8', 'ignore')
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
@ -132,12 +119,12 @@ class HlsFD(FragmentFD):
|
||||
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
|
||||
|
||||
def is_ad_fragment_start(s):
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
fragments = []
|
||||
|
||||
@ -173,12 +160,10 @@ class HlsFD(FragmentFD):
|
||||
extra_state = ctx.setdefault('extra_state', {})
|
||||
|
||||
format_index = info_dict.get('format_index')
|
||||
extra_segment_query = None
|
||||
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
|
||||
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
extra_key_query = None
|
||||
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
|
||||
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@ -190,7 +175,6 @@ class HlsFD(FragmentFD):
|
||||
if external_aes_iv:
|
||||
external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
|
||||
byte_range = {}
|
||||
byte_range_offset = 0
|
||||
discontinuity_count = 0
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
@ -206,8 +190,8 @@ class HlsFD(FragmentFD):
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = urljoin(man_url, line)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
|
||||
fragments.append({
|
||||
'frag_index': frag_index,
|
||||
@ -218,11 +202,6 @@ class HlsFD(FragmentFD):
|
||||
})
|
||||
media_sequence += 1
|
||||
|
||||
# If the byte_range is truthy, reset it after appending a fragment that uses it
|
||||
if byte_range:
|
||||
byte_range_offset = byte_range['end']
|
||||
byte_range = {}
|
||||
|
||||
elif line.startswith('#EXT-X-MAP'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
continue
|
||||
@ -233,15 +212,13 @@ class HlsFD(FragmentFD):
|
||||
frag_index += 1
|
||||
map_info = parse_m3u8_attributes(line[11:])
|
||||
frag_url = urljoin(man_url, map_info.get('URI'))
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
|
||||
map_byte_range = {}
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
|
||||
if map_info.get('BYTERANGE'):
|
||||
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0
|
||||
map_byte_range = {
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
@ -250,8 +227,8 @@ class HlsFD(FragmentFD):
|
||||
'frag_index': frag_index,
|
||||
'url': frag_url,
|
||||
'decrypt_info': decrypt_info,
|
||||
'byte_range': map_byte_range,
|
||||
'media_sequence': media_sequence,
|
||||
'byte_range': byte_range,
|
||||
'media_sequence': media_sequence
|
||||
})
|
||||
media_sequence += 1
|
||||
|
||||
@ -267,10 +244,8 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info['KEY'] = external_aes_key
|
||||
else:
|
||||
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
|
||||
if extra_key_query or extra_segment_query:
|
||||
# Fall back to extra_segment_query to key for backwards compat
|
||||
decrypt_info['URI'] = update_url_query(
|
||||
decrypt_info['URI'], extra_key_query or extra_segment_query)
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
|
||||
@ -278,7 +253,7 @@ class HlsFD(FragmentFD):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
splitted_byte_range = line[17:].split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
@ -375,8 +350,9 @@ class HlsFD(FragmentFD):
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'))
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
@ -393,9 +369,6 @@ class HlsFD(FragmentFD):
|
||||
|
||||
return output.getvalue().encode()
|
||||
|
||||
if len(fragments) == 1:
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
else:
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
ThrottledDownload,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
try_call,
|
||||
@ -57,8 +58,9 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(ctx.tmpfilename):
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
@ -174,7 +176,7 @@ class HttpFD(FileDownloader):
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
}, info_dict)
|
||||
raise SucceedDownload
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
@ -192,7 +194,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def close_stream():
|
||||
if ctx.stream is not None:
|
||||
if ctx.tmpfilename != '-':
|
||||
if not ctx.tmpfilename == '-':
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
|
||||
@ -235,13 +237,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def retry(e):
|
||||
close_stream()
|
||||
if ctx.tmpfilename == '-':
|
||||
ctx.resume_len = byte_counter
|
||||
else:
|
||||
try:
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
except FileNotFoundError:
|
||||
ctx.resume_len = 0
|
||||
ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
|
||||
else os.path.getsize(encodeFilename(ctx.tmpfilename)))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
@ -266,20 +263,20 @@ class HttpFD(FileDownloader):
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except OSError as err:
|
||||
self.report_error(f'unable to open for writing: {err}')
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error(f'unable to set filesize xattr: {err}')
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except OSError as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error(f'unable to write data: {err}')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
@ -325,7 +322,7 @@ class HttpFD(FileDownloader):
|
||||
elif now - ctx.throttle_start > 3:
|
||||
if ctx.stream is not None and ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
raise ThrottledDownload
|
||||
raise ThrottledDownload()
|
||||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
@ -336,7 +333,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
frag_index = 0
|
||||
for segment in segments:
|
||||
for i, segment in enumerate(segments):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
|
@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
|
||||
|
||||
|
||||
class MhtmlFD(FragmentFD):
|
||||
_STYLESHEET = '''\
|
||||
_STYLESHEET = """\
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
@ -45,7 +45,7 @@ body > figure > img {
|
||||
max-width: 100%;
|
||||
max-height: calc(100vh - 5em);
|
||||
}
|
||||
'''
|
||||
"""
|
||||
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
|
||||
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
|
||||
|
||||
@ -57,19 +57,24 @@ body > figure > img {
|
||||
)).decode('us-ascii') + '?='
|
||||
|
||||
def _gen_cid(self, i, fragment, frag_boundary):
|
||||
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
|
||||
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
|
||||
|
||||
def _gen_stub(self, *, fragments, frag_boundary, title):
|
||||
output = io.StringIO()
|
||||
|
||||
output.write(
|
||||
output.write((
|
||||
'<!DOCTYPE html>'
|
||||
'<html>'
|
||||
'<head>'
|
||||
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
|
||||
f'<title>{escapeHTML(title)}</title>'
|
||||
f'<style>{self._STYLESHEET}</style>'
|
||||
'<body>')
|
||||
'' '<meta name="generator" content="yt-dlp {version}">'
|
||||
'' '<title>{title}</title>'
|
||||
'' '<style>{styles}</style>'
|
||||
'<body>'
|
||||
).format(
|
||||
version=escapeHTML(YT_DLP_VERSION),
|
||||
styles=self._STYLESHEET,
|
||||
title=escapeHTML(title)
|
||||
))
|
||||
|
||||
t0 = 0
|
||||
for i, frag in enumerate(fragments):
|
||||
@ -82,12 +87,15 @@ body > figure > img {
|
||||
num=i + 1,
|
||||
t0=srt_subtitles_timecode(t0),
|
||||
t1=srt_subtitles_timecode(t1),
|
||||
duration=formatSeconds(frag['duration'], msec=True),
|
||||
duration=formatSeconds(frag['duration'], msec=True)
|
||||
))
|
||||
except (KeyError, ValueError, TypeError):
|
||||
t1 = None
|
||||
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
|
||||
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
|
||||
output.write((
|
||||
'<figcaption>Slide #{num}</figcaption>'
|
||||
).format(num=i + 1))
|
||||
output.write('<img src="cid:{cid}">'.format(
|
||||
cid=self._gen_cid(i, frag, frag_boundary)))
|
||||
output.write('</figure>')
|
||||
t0 = t1
|
||||
|
||||
@ -118,24 +126,31 @@ body > figure > img {
|
||||
stub = self._gen_stub(
|
||||
fragments=fragments,
|
||||
frag_boundary=frag_boundary,
|
||||
title=title,
|
||||
title=title
|
||||
)
|
||||
|
||||
ctx['dest_stream'].write((
|
||||
'MIME-Version: 1.0\r\n'
|
||||
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
f'Subject: {self._escape_mime(title)}\r\n'
|
||||
'Subject: {title}\r\n'
|
||||
'Content-type: multipart/related; '
|
||||
f'boundary="{frag_boundary}"; '
|
||||
'type="text/html"\r\n'
|
||||
f'X.yt-dlp.Origin: {origin}\r\n'
|
||||
'' 'boundary="{boundary}"; '
|
||||
'' 'type="text/html"\r\n'
|
||||
'X.yt-dlp.Origin: {origin}\r\n'
|
||||
'\r\n'
|
||||
f'--{frag_boundary}\r\n'
|
||||
'--{boundary}\r\n'
|
||||
'Content-Type: text/html; charset=utf-8\r\n'
|
||||
f'Content-Length: {len(stub)}\r\n'
|
||||
'Content-Length: {length}\r\n'
|
||||
'\r\n'
|
||||
f'{stub}\r\n').encode())
|
||||
'{stub}\r\n'
|
||||
).format(
|
||||
origin=origin,
|
||||
boundary=frag_boundary,
|
||||
length=len(stub),
|
||||
title=self._escape_mime(title),
|
||||
stub=stub
|
||||
).encode())
|
||||
extra_state['header_written'] = True
|
||||
|
||||
for i, fragment in enumerate(fragments):
|
||||
|
@ -2,10 +2,58 @@ import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
|
||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
@ -16,6 +64,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
cookies = info_dict.get('cookies')
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
@ -27,7 +76,12 @@ class NiconicoLiveFD(FileDownloader):
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookies': str_or_none(cookies) or '',
|
||||
'Origin': f'https://{ws_origin_host}',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||
})
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
@ -37,15 +91,14 @@ class NiconicoLiveFD(FileDownloader):
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
'chasePlay': False
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True,
|
||||
'commentable': True
|
||||
},
|
||||
'reconnect': True,
|
||||
},
|
||||
}
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
@ -71,7 +124,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen(f'[debug] Server said: {recv}')
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
@ -81,7 +134,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
||||
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
Popen,
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@ -178,15 +179,15 @@ class RtmpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = [*basic_args, '--resume']
|
||||
args = basic_args + ['--resume']
|
||||
if retval == RD_FAILED:
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
retval = run_rtmpdump(args)
|
||||
cursize = os.path.getsize(tmpfilename)
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
@ -195,8 +196,8 @@ class RtmpFD(FileDownloader):
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
|
@ -2,7 +2,7 @@ import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import check_executable
|
||||
from ..utils import check_executable, encodeFilename
|
||||
|
||||
|
||||
class RtspFD(FileDownloader):
|
||||
@ -26,7 +26,7 @@ class RtspFD(FileDownloader):
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(f'\r[{args[0]}] {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
|
@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
|
||||
self.report_warning('Live chat download runs until the livestream ends. '
|
||||
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
|
||||
@ -123,8 +123,8 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
data,
|
||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||
|
||||
func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
|
||||
or (frag_index == 1 and try_refresh_replay_beginning)
|
||||
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
|
||||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except HTTPError as err:
|
||||
|
@ -1,25 +1,16 @@
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
from ..globals import extractors as _extractors_context
|
||||
from ..globals import plugin_ies as _plugin_ies_context
|
||||
from ..plugins import PluginSpec, register_plugin_spec
|
||||
|
||||
passthrough_module(__name__, '.extractors')
|
||||
del passthrough_module
|
||||
|
||||
register_plugin_spec(PluginSpec(
|
||||
module_name='extractor',
|
||||
suffix='IE',
|
||||
destination=_extractors_context,
|
||||
plugin_destination=_plugin_ies_context,
|
||||
))
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
import_extractors()
|
||||
return list(_extractors_context.value.values())
|
||||
from .extractors import _ALL_CLASSES
|
||||
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
@ -46,9 +37,6 @@ def list_extractors(age_limit=None):
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
import_extractors()
|
||||
return _extractors_context.value[f'{ie_name}IE']
|
||||
from . import extractors
|
||||
|
||||
|
||||
def import_extractors():
|
||||
from . import extractors # noqa: F401
|
||||
return getattr(extractors, f'{ie_name}IE')
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,18 +4,18 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -66,7 +66,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'WWI Centenary',
|
||||
'description': 'md5:c2379ec0ca84072e86b446e536954546',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
|
||||
'info_dict': {
|
||||
@ -74,7 +74,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
|
||||
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
|
||||
'info_dict': {
|
||||
@ -85,7 +85,7 @@ class ABCIE(InfoExtractor):
|
||||
'upload_date': '20200813',
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
@ -94,7 +94,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -125,7 +125,7 @@ class ABCIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
|
||||
if expired:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
@ -163,7 +163,7 @@ class ABCIE(InfoExtractor):
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
'format_id': format_id,
|
||||
'format_id': format_id
|
||||
})
|
||||
|
||||
return {
|
||||
@ -180,100 +180,20 @@ class ABCIViewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
|
||||
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
|
||||
'info_dict': {
|
||||
'id': 'CO1211V001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 1 Ep 1 Wood For The Trees',
|
||||
'series': 'Utopia',
|
||||
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
|
||||
'upload_date': '20230726',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'CO1211V',
|
||||
'episode_id': 'CO1211V001S00',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Wood For The Trees',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
|
||||
'timestamp': 1690403700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 11 Ep 1',
|
||||
'series': 'Gruen',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'LE1927H',
|
||||
'episode_id': 'LE1927H001S00',
|
||||
'season_number': 11,
|
||||
'season': 'Season 11',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode number',
|
||||
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
|
||||
'md5': '77cb7d8434440e3b28fbebe331c2456a',
|
||||
'info_dict': {
|
||||
'id': 'NC2203H039S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2022 Locking Up Kids',
|
||||
'series': 'Four Corners',
|
||||
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
|
||||
'upload_date': '20221114',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'NC2203H',
|
||||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name or number',
|
||||
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
|
||||
'md5': '2e17dec06b13cc81dc119d2565289396',
|
||||
'info_dict': {
|
||||
'id': 'RF2004Q043S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2021',
|
||||
'series': 'Landline',
|
||||
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
|
||||
'upload_date': '20211205',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'RF2004Q',
|
||||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
@ -287,12 +207,13 @@ class ABCIViewIE(InfoExtractor):
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
b'android.content.res.Resources',
|
||||
path.encode(), hashlib.sha256).hexdigest()
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
@ -301,7 +222,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], str)
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
@ -334,8 +255,6 @@ class ABCIViewIE(InfoExtractor):
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'episode': self._search_regex(
|
||||
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
@ -356,7 +275,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'md5:93119346c24a7c322d446d8eece430ff',
|
||||
'series': 'Upper Middle Bogan',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
@ -375,39 +294,17 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'noplaylist': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# 'videoEpisodes' is a dict with `items` key
|
||||
'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
|
||||
'info_dict': {
|
||||
'id': '178458-0',
|
||||
'title': 'Episodes',
|
||||
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
|
||||
'series': '7.30 Mark Humphries Satire',
|
||||
'season': 'Episodes',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
@ -416,14 +313,12 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self.url_result(episode_url, ABCIViewIE)
|
||||
for episode_url in traverse_obj(series, (
|
||||
'_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
|
||||
'entries': [self.url_result(episode['shareUrl'])
|
||||
for episode in series['_embedded']['videoEpisodes']],
|
||||
'id': series.get('id'),
|
||||
'title': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'description': series.get('description'),
|
||||
'series': dict_get(series, ('showTitle', 'displayTitle')),
|
||||
'season': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'thumbnail': traverse_obj(
|
||||
series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
|
||||
'thumbnail': series.get('thumbnail'),
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
@ -56,11 +57,11 @@ class ABCOTVSIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||
'id': video_id,
|
||||
'key': f'otv.web.{station}.story',
|
||||
'key': 'otv.web.%s.story' % station,
|
||||
'station': station,
|
||||
})['data']
|
||||
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
title = video.get('title') or video['linkText']
|
||||
|
||||
formats = []
|
||||
|
@ -6,54 +6,53 @@ import hmac
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@ -63,27 +62,33 @@ class AbemaLicenseRH(RequestHandler):
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket,
|
||||
}).encode(),
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = list(res.to_bytes(16, 'big'))
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
binascii.unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = list(h.digest())
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
@ -92,11 +97,11 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _generate_aks(cls, deviceid):
|
||||
deviceid = deviceid.encode()
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode()
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
|
||||
tmp = None
|
||||
|
||||
@ -108,7 +113,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for _ in range(count):
|
||||
for i in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
@ -128,15 +133,11 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
@ -149,12 +150,13 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode(),
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@ -169,44 +171,13 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1',
|
||||
'appVersion': '3.27.1'
|
||||
}, headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})['token']
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
@ -230,14 +201,14 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
'id': '194-25_s2_p1',
|
||||
'title': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'series': '異世界食堂2',
|
||||
'season': 'シーズン2',
|
||||
'season_number': 2,
|
||||
'series_number': 2,
|
||||
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'episode_number': 1,
|
||||
},
|
||||
@ -249,7 +220,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series': 'ゆるキャン△ SEASON2',
|
||||
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'season_number': 2,
|
||||
'series_number': 2,
|
||||
'episode_number': 1,
|
||||
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
|
||||
},
|
||||
@ -278,6 +249,33 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
@ -333,7 +331,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
@ -346,18 +344,17 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
)?
|
||||
''', r'\1', og_desc)
|
||||
|
||||
# canonical URL may contain season and episode number
|
||||
# canonical URL may contain series and episode number
|
||||
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
|
||||
if mobj:
|
||||
seri = int_or_none(mobj.group(1), default=float('inf'))
|
||||
epis = int_or_none(mobj.group(2), default=float('inf'))
|
||||
info['season_number'] = seri if seri < 100 else None
|
||||
info['series_number'] = seri if seri < 100 else None
|
||||
# some anime like Detective Conan (though not available in AbemaTV)
|
||||
# has more than 1000 episodes (1026 as of 2021/11/15)
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@ -375,13 +372,13 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
'season': ('season', 'title'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
@ -398,7 +395,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@ -416,25 +412,19 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
|
||||
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
|
||||
info['thumbnails'] = [{'url': thumbnail}]
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?'
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1887',
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'info_dict': {
|
||||
'id': '90-1887',
|
||||
'id': '90-1597',
|
||||
'title': 'シャッフルアイランド',
|
||||
'description': 'md5:61b2425308f41a5282a926edda66f178',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
@ -442,54 +432,41 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
'description': 'md5:9b59493d1f3a792bafbc7319258e7af8',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/25-1nzan-whrxe',
|
||||
'url': 'https://abema.tv/video/title/25-102',
|
||||
'info_dict': {
|
||||
'id': '25-1nzan-whrxe',
|
||||
'title': 'ソードアート・オンライン',
|
||||
'description': 'md5:c094904052322e6978495532bdbf06e6',
|
||||
'id': '25-102',
|
||||
'title': 'ソードアート・オンライン アリシゼーション',
|
||||
},
|
||||
'playlist_mincount': 25,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40',
|
||||
'info_dict': {
|
||||
'title': '〈物語〉シリーズ',
|
||||
'id': '26-2mzbynr-cph',
|
||||
'description': 'md5:e67873de1c88f360af1f0a4b84847a52',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
'playlist_mincount': 24,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, playlist_id, series_version, season_id, page):
|
||||
def _fetch_page(self, playlist_id, series_version, page):
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query={
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
}
|
||||
if season_id:
|
||||
query['seasonId'] = season_id
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query=query)
|
||||
})
|
||||
yield from (
|
||||
self.url_result(f'https://abema.tv/video/episode/{x}')
|
||||
for x in traverse_obj(programs, ('programs', ..., 'id')))
|
||||
|
||||
def _entries(self, playlist_id, series_version, season_id):
|
||||
def _entries(self, playlist_id, series_version):
|
||||
return OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, playlist_id, series_version, season_id),
|
||||
functools.partial(self._fetch_page, playlist_id, series_version),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, season_id = self._match_valid_url(url).group('id', 'season')
|
||||
playlist_id = self._match_id(url)
|
||||
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id,
|
||||
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
|
||||
playlist_title=series_info.get('title'),
|
||||
playlist_description=series_info.get('content'))
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www|shows)\.)?acast\.com/|
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna',
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creators': ['Third Ear Studio'],
|
||||
'creator': 'Third Ear Studio',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
@ -67,16 +67,13 @@ class ACastIE(ACastBaseIE):
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
@ -96,13 +93,13 @@ class ACastIE(ACastBaseIE):
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
episode = self._call_api(
|
||||
f'{channel}/episodes/{display_id}',
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
@ -113,7 +110,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|shows)\.)?acast\.com/|
|
||||
(?:www\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@ -123,20 +120,17 @@ class ACastChannelIE(ACastBaseIE):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:feca253de9947634605080cd9eeea2bf',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://shows.acast.com/sparpodcast',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super().suitable(url)
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
|
@ -3,10 +3,9 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@ -25,7 +24,7 @@ class AcFunVideoBaseIE(InfoExtractor):
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': float_or_none(video.get('avgBitrate')),
|
||||
**parse_codecs(video.get('codecs', '')),
|
||||
**parse_codecs(video.get('codecs', ''))
|
||||
})
|
||||
|
||||
return {
|
||||
@ -77,7 +76,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -130,7 +129,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
|
||||
'duration': 760.0,
|
||||
'season': '红孩儿之趴趴蛙寻石记',
|
||||
'season_id': '5023171',
|
||||
'season_id': 5023171,
|
||||
'season_number': 1, # series has only 1 season
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
@ -147,7 +146,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': '叽歪老表(第二季) 第5话 坚不可摧',
|
||||
'season': '叽歪老表(第二季)',
|
||||
'season_number': 2,
|
||||
'season_id': '6065485',
|
||||
'season_id': 6065485,
|
||||
'episode': '坚不可摧',
|
||||
'episode_number': 5,
|
||||
'upload_date': '20220324',
|
||||
@ -192,7 +191,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
'title': json_bangumi_data.get('showTitle'),
|
||||
'thumbnail': json_bangumi_data.get('image'),
|
||||
'season': json_bangumi_data.get('bangumiTitle'),
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode': json_bangumi_data.get('title'),
|
||||
'episode_number': episode_number,
|
||||
|
@ -3,53 +3,33 @@ import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@ -64,32 +44,29 @@ class ADNIE(ADNBaseIE):
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
@ -106,9 +83,9 @@ class ADNIE(ADNBaseIE):
|
||||
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
base64.b64decode(enc_subtitles[24:]),
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
base64.b64decode(enc_subtitles[:24])))
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
@ -131,7 +108,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
|
||||
ass_subtitles_timecode(start),
|
||||
ass_subtitles_timecode(end),
|
||||
'{\\a%d}' % alignment if alignment != 2 else '',
|
||||
@ -139,8 +116,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
@ -162,7 +137,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@ -172,9 +147,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
video_id = self._match_id(url)
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata',
|
||||
@ -183,29 +157,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
self.raise_login_required()
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = list(json.dumps({
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}).encode())
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = bytes(pkcs1pad(message, 128))
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
@ -213,13 +184,12 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
**self._HEADERS,
|
||||
'X-Player-Token': authorization
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web',
|
||||
'source': 'Web'
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
@ -232,7 +202,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
@ -251,8 +221,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
'Downloading %s %s JSON metadata' % (format_id, quality),
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@ -263,17 +232,11 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@ -292,38 +255,3 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(join_nonempty(
|
||||
'https://animationdigitalnetwork.com', lang, 'video',
|
||||
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
@ -1,6 +1,8 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AdobeConnectIE(InfoExtractor):
|
||||
@ -10,13 +12,13 @@ class AdobeConnectIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
for con_string in qs['conStrings'][0].split(','):
|
||||
formats.append({
|
||||
'format_id': con_string.split('://')[0],
|
||||
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + qs['streamName'][0],
|
||||
'rtmp_conn': 'S:' + qs['ticket'][0],
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,12 +2,13 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@ -35,7 +36,7 @@ class AdobeTVBaseIE(InfoExtractor):
|
||||
return subtitles
|
||||
|
||||
def _parse_video_data(self, video_data):
|
||||
video_id = str(video_data['id'])
|
||||
video_id = compat_str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
s3_extracted = False
|
||||
@ -150,7 +151,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||
page += 1
|
||||
query['page'] = page
|
||||
for element_data in self._call_api(
|
||||
self._RESOURCE, display_id, query, f'Download Page {page}'):
|
||||
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||
yield self._process_data(element_data)
|
||||
|
||||
def _extract_playlist_entries(self, display_id, query):
|
||||
|
@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
getShowBySlug(slug:"%s") {
|
||||
%%s
|
||||
}
|
||||
}''' % show_path # noqa: UP031
|
||||
}''' % show_path
|
||||
if episode_path:
|
||||
query = query % '''title
|
||||
getVideoBySlug(slug:"%s") {
|
||||
@ -107,6 +107,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
title
|
||||
tvRating
|
||||
}''' % episode_path
|
||||
['getVideoBySlug']
|
||||
else:
|
||||
query = query % '''metaDescription
|
||||
title
|
||||
@ -128,7 +129,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
episode_title = title = video_data['title']
|
||||
series = show_data.get('title')
|
||||
if series:
|
||||
title = f'{series} - {title}'
|
||||
title = '%s - %s' % (series, title)
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -191,7 +192,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
f'http://adultswim.com/videos/{show_path}/{slug}',
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
'AdultSwim', video.get('_id')))
|
||||
return self.playlist_result(
|
||||
entries, show_path, show_data.get('title'),
|
||||
|
@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
@ -121,28 +121,18 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:a40e370925074260b1c8a633c632c63a',
|
||||
'title': 'Winter is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 2592.0,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:5',
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@ -153,15 +143,6 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 1277.695,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -171,28 +152,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -209,14 +190,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}''' % (resource, slug, fields),
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = f'http://watch.{domain}'
|
||||
base_url = 'http://watch.%s' % domain
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
@ -248,10 +229,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
@ -309,7 +290,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': 'History of Valentine’s Day',
|
||||
'title': "History of Valentine’s Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
@ -357,13 +338,12 @@ class BiographyIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348,
|
||||
},
|
||||
'duration': 348
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344,
|
||||
},
|
||||
'duration': 1344
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
|
@ -1,26 +1,142 @@
|
||||
import datetime as dt
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
UserNotLive,
|
||||
date_from_str,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AfreecaTVBaseIE(InfoExtractor):
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'afreecatv'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
||||
'upload_date': '20230108',
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def parse_video_key(key):
|
||||
video_key = {}
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = {
|
||||
@ -34,21 +150,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@ -57,206 +173,169 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
@staticmethod
|
||||
def _fixup_thumb(thumb_url):
|
||||
if not url_or_none(thumb_url):
|
||||
return None
|
||||
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
|
||||
return [{'url': thumb_url, 'ext': 'jpg'}]
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673186405,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'timestamp': 1491929865,
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'station/video/a/view', video_id, headers={'Referer': url},
|
||||
data=urlencode_postdata({
|
||||
|
||||
partial_view = False
|
||||
adult_view = False
|
||||
for _ in range(2):
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view',
|
||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
|
||||
error_code = traverse_obj(data, ('code', {int}))
|
||||
if error_code == -6221:
|
||||
if traverse_obj(data, ('code', {int})) == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
elif error_code == -6205:
|
||||
raise ExtractorError('This VOD is private', expected=True)
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': data['station_no'],
|
||||
'nBbsNo': data['bbs_no'],
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
if adult_view:
|
||||
query['adultView'] = 'ADULT_VIEW'
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, 'Downloading video info XML%s'
|
||||
% (' (skipping adult)' if partial_view else ''),
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
}, query=query)
|
||||
|
||||
common_info = traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
partial_view = True
|
||||
continue
|
||||
elif flag == 'ADULT':
|
||||
if not adult_view:
|
||||
adult_view = True
|
||||
continue
|
||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
||||
else:
|
||||
error = flag
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
|
||||
video_element = video_xml.findall('./track/video')[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
||||
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(
|
||||
video_xml, './track/duration', 'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
common_entry = {
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
info = common_entry.copy()
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
if not video_url:
|
||||
entries = []
|
||||
for file_num, file_element in enumerate(
|
||||
traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
|
||||
file_url = file_element['file']
|
||||
file_elements = video_element.findall('./file')
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'^(\d{8})_', key, 'upload date', default=None))
|
||||
if upload_date is not None:
|
||||
# sometimes the upload date isn't included in the file name
|
||||
# instead, another random ID is, which may parse as a valid
|
||||
# date but be wildly out of a reasonable range
|
||||
parsed_date = date_from_str(upload_date)
|
||||
if parsed_date.year < 2000 or parsed_date.year >= 2100:
|
||||
upload_date = None
|
||||
file_duration = int_or_none(file_element.get('duration'))
|
||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', m3u8_id='hls',
|
||||
note=f'Downloading part {file_num} m3u8 information')
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
|
||||
entries.append({
|
||||
**common_info,
|
||||
'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
if not formats and not self.get_param('ignore_no_formats'):
|
||||
continue
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {parse_iso8601(delimiter=' ', timezone=dt.timedelta(hours=9))}),
|
||||
}),
|
||||
})
|
||||
entries.append(file_info)
|
||||
entries_info = info.copy()
|
||||
entries_info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries,
|
||||
})
|
||||
return entries_info
|
||||
|
||||
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
|
||||
if not entries:
|
||||
self.raise_login_required(
|
||||
'Only users older than 19 are able to watch this video', method='password')
|
||||
self.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are '
|
||||
'restricted from watching adult content. Only content suitable for all '
|
||||
f'ages will be downloaded. {self._login_hint("password")}')
|
||||
|
||||
if not entries and traverse_obj(data, ('sub_upload_type', {str})):
|
||||
self.raise_login_required('This VOD is for subscribers only', method='password')
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'title': common_info.get('title'),
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
app, playpath = video_url.split('mp4:')
|
||||
info.update({
|
||||
'url': app,
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + playpath,
|
||||
'rtmp_live': True, # downloading won't end without this
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
IE_NAME = 'afreecatv:live'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
def _entries(self, data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@ -268,121 +347,94 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'url': 'http://play.afreeca.com/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'url': 'http://play.afreeca.com/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
*traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
|
||||
*self._WORKING_CDNS,
|
||||
])
|
||||
cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
|
||||
|
||||
for attempt, cdn_id in enumerate(cdn_ids, start=1):
|
||||
m3u8_url = traverse_obj(self._download_json(
|
||||
urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
|
||||
f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
|
||||
fatal=False, query={
|
||||
'return_type': cdn_id,
|
||||
'broad_key': f'{broadcast_no}-common-master-hls',
|
||||
}), ('view_url', {url_or_none}))
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
self.report_warning(
|
||||
f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
|
||||
_QUALITIES = ('sd', 'hd', 'hd2k', 'original')
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
channel_info = traverse_obj(self._download_json(
|
||||
self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
|
||||
('CHANNEL', {dict})) or {}
|
||||
password = self.get_param('videopassword')
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
password_protected = channel_info.get('BPWD')
|
||||
if not broadcast_no:
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
if password_protected == 'Y' and password is None:
|
||||
raise ExtractorError(
|
||||
'This livestream is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
token_info = traverse_obj(self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
|
||||
'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
params = {
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': 'master',
|
||||
'pwd': password,
|
||||
}))), ('CHANNEL', {dict})) or {}
|
||||
aid = token_info.get('AID')
|
||||
'quality': quality_str,
|
||||
}
|
||||
if password is not None:
|
||||
params['pwd'] = password
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata(params),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
if not aid:
|
||||
result = token_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise ExtractorError('This livestream has ended', expected=True)
|
||||
elif result == -6:
|
||||
self.raise_login_required('This livestream is for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract access token')
|
||||
continue
|
||||
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_info = self._download_json(
|
||||
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
|
||||
query={
|
||||
'return_type': channel_info.get('CDN', 'gcp_cdn'),
|
||||
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
|
||||
},
|
||||
note=f'Downloading metadata for {quality_str} stream',
|
||||
errnote=f'Unable to download metadata for {quality_str} stream') or {}
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
if stream_info.get('view_url'):
|
||||
formats.append({
|
||||
'format_id': quality_str,
|
||||
'url': update_url_query(stream_info['view_url'], {'aid': aid}),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': quality_key(quality_str),
|
||||
})
|
||||
|
||||
station_info = self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
query={'szBjId': broadcaster_id}, fatal=False,
|
||||
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
|
||||
|
||||
return {
|
||||
'id': broadcast_no,
|
||||
'title': channel_info.get('TITLE') or station_info.get('station_title'),
|
||||
'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
|
||||
'uploader_id': broadcaster_id,
|
||||
'timestamp': parse_iso8601(station_info.get('broad_start'), delimiter=' ', timezone=dt.timedelta(hours=9)),
|
||||
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -390,7 +442,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@ -398,7 +450,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -406,7 +458,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@ -418,12 +470,12 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
||||
'info_dict': {
|
||||
'id': '91275',
|
||||
'ext': 'mp3',
|
||||
'ext': 'aac',
|
||||
'title': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'series': 'Analizy',
|
||||
@ -164,20 +164,23 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
raise ExtractorError('No such podcast', expected=True)
|
||||
metadata = metadata[0]
|
||||
|
||||
mp3_url = self._download_json(
|
||||
'https://api.podcast.radioagora.pl/api4/getSongUrl',
|
||||
media_id, 'Downloading podcast mp3 URL', query={
|
||||
'podcast_id': media_id,
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'ppre': 'false',
|
||||
'audio': 'mp3',
|
||||
})['link_ssl']
|
||||
formats = []
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
'url': url_data['link_ssl'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'url': mp3_url,
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
'formats': formats,
|
||||
'title': metadata.get('podcast_name'),
|
||||
'series': metadata.get('series_name'),
|
||||
'episode': metadata.get('podcast_name'),
|
||||
@ -203,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(video_id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{video_id}'
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
63
plugins/youtube_download/yt_dlp/extractor/airmozilla.py
Normal file
63
plugins/youtube_download/yt_dlp/extractor/airmozilla.py
Normal file
@ -0,0 +1,63 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
'location': 'SFO Commons',
|
||||
'duration': 3780,
|
||||
'view_count': int,
|
||||
'categories': ['Main', 'Privacy'],
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._parse_json(self._search_regex(
|
||||
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
traverse_obj
|
||||
)
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
|
@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
|
||||
'timestamp': 1667370519,
|
||||
'title': 'Ангел хранитель 1 серия',
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
|
@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
@ -43,7 +44,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], str),
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'timestamp': 1636219149,
|
||||
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
|
||||
'upload_date': '20211106',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
|
||||
'info_dict': {
|
||||
@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
base, post_type, display_id = self._match_valid_url(url).groups()
|
||||
base, post_type, id = self._match_valid_url(url).groups()
|
||||
wp = {
|
||||
'balkans.aljazeera.net': 'ajb',
|
||||
'chinese.aljazeera.net': 'chinese',
|
||||
@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'news': 'news',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
f'https://{base}/graphql', display_id, query={
|
||||
f'https://{base}/graphql', id, query={
|
||||
'wp-site': wp,
|
||||
'operationName': 'ArchipelagoSingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': display_id,
|
||||
'name': id,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
embed = 'default'
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
|
||||
group=(1, 2, 3, 4), default=(None, None, None, None))
|
||||
@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'ie_key': 'Generic',
|
||||
'ie_key': 'Generic'
|
||||
}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'ie_key': 'BrightcoveNew'
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
@ -94,11 +95,11 @@ class AllocineIE(InfoExtractor):
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], str))
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
|
@ -1,252 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
clipImageThumb
|
||||
clipLink
|
||||
clipTitle
|
||||
createdDate
|
||||
shareId
|
||||
user { _id }
|
||||
username
|
||||
views'''
|
||||
|
||||
_EXTRA_FIELDS = '''
|
||||
clipLength
|
||||
clipSizeBytes'''
|
||||
|
||||
_QUERIES = {
|
||||
'clip': '''query ($id: String!) {
|
||||
video: getClip(clipIdentifier: $id) {
|
||||
%s %s
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'montage': '''query ($id: String!) {
|
||||
video: getMontage(clipIdentifier: $id) {
|
||||
%s
|
||||
}
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'Montages': '''query ($page: Int!, $user: String!) {
|
||||
videos: montages(search: createdDate, page: $page, user: $user) {
|
||||
data { %s }
|
||||
}
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Mobile Clips': '''query ($page: Int!, $user: String!) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
}
|
||||
|
||||
|
||||
class AllstarBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_video_data(video_data):
|
||||
def media_url_or_none(path):
|
||||
return urljoin('https://media.allstar.gg/', path)
|
||||
|
||||
info = traverse_obj(video_data, {
|
||||
'id': ('_id', {str}),
|
||||
'display_id': ('shareId', {str}),
|
||||
'title': ('clipTitle', {str}),
|
||||
'url': ('clipLink', {media_url_or_none}),
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
})
|
||||
|
||||
if info.get('id') and info.get('url'):
|
||||
basename = 'clip' if '/clips/' in info['url'] else 'montage'
|
||||
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
|
||||
|
||||
info.update({
|
||||
'extractor_key': AllstarIE.ie_key(),
|
||||
'extractor': AllstarIE.IE_NAME,
|
||||
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, query, variables, path, video_id=None, note=None):
|
||||
response = self._download_json(
|
||||
'https://a1.allstar.gg/graphql', video_id, note=note,
|
||||
headers={'content-type': 'application/json'},
|
||||
data=json.dumps({'variables': variables, 'query': query}).encode())
|
||||
|
||||
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
|
||||
if errors:
|
||||
raise ExtractorError('; '.join(errors))
|
||||
|
||||
return traverse_obj(response, path)
|
||||
|
||||
|
||||
class AllstarIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
|
||||
'info_dict': {
|
||||
'id': '64482c2da9eec30008a67d1b',
|
||||
'title': '4K on Inferno',
|
||||
'url': 'md5:66befb5381eef0c9456026386c25fa55',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'uploader': 'chrk.',
|
||||
'ext': 'mp4',
|
||||
'duration': 20,
|
||||
'filesize': 21199257,
|
||||
'timestamp': 1682451501,
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230425',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
|
||||
'info_dict': {
|
||||
'id': '64a1ec6b887f4c0008dc50b8',
|
||||
'display_id': '8LJLY4JKB',
|
||||
'title': 'AK-47 3K on Mirage',
|
||||
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'duration': 16,
|
||||
'filesize': 30175859,
|
||||
'timestamp': 1688333419,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230702',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
|
||||
'info_dict': {
|
||||
'id': '643e64089da7e9363e1fa66c',
|
||||
'display_id': 'APQLGM2IMXW',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:a3ee356022115db2b27c81321d195945',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1681810448,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230418',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
|
||||
'info_dict': {
|
||||
'id': '64a2697372ce3703de29e868',
|
||||
'display_id': 'RILJMH6QOS',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1688365434,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230703',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query_id, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
return self._parse_video_data(
|
||||
self._call_api(
|
||||
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
|
||||
|
||||
|
||||
class AllstarProfileIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips',
|
||||
'title': 'cherokee - Clips',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips-730',
|
||||
'title': 'cherokee - Clips - 730',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-montages',
|
||||
'title': 'cherokee - Montages',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-mobile',
|
||||
'title': 'cherokee - Mobile Clips',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _get_page(self, user_id, display_id, game, query, page_num):
|
||||
page_num += 1
|
||||
|
||||
for video_data in self._call_api(
|
||||
query, {
|
||||
'user': user_id,
|
||||
'page': page_num,
|
||||
'game': game,
|
||||
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
|
||||
yield self._parse_video_data(video_data)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
profile_data = self._download_json(
|
||||
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
|
||||
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
|
||||
if not user_id:
|
||||
raise ExtractorError('Unable to extract the user id')
|
||||
|
||||
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
|
||||
url_query = parse_qs(url)
|
||||
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
|
||||
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
|
||||
|
||||
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
|
||||
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(
|
||||
functools.partial(
|
||||
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
|
||||
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
|
||||
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))
|
@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
@ -1,104 +0,0 @@
|
||||
import re
|
||||
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AltCensoredIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
|
||||
'info_dict': {
|
||||
'id': 'youtube-k0srjLSkga8',
|
||||
'ext': 'webm',
|
||||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creators': ['Virginie Vota'],
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
|
||||
'timestamp': 1679161343,
|
||||
'track': 'k0srjLSkga8',
|
||||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
category = clean_html(self._html_search_regex(
|
||||
r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://archive.org/details/youtube-{video_id}',
|
||||
'ie_key': ArchiveOrgIE.ie_key(),
|
||||
'view_count': str_to_int(self._html_search_regex(
|
||||
r'YouTube Views:(?:\s| )*([\d,]+)', webpage, 'view count', default=None)),
|
||||
'categories': [category] if category else None,
|
||||
}
|
||||
|
||||
|
||||
class AltCensoredChannelIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
|
||||
_PAGE_SIZE = 24
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
'info_dict': {
|
||||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 85,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
|
||||
'info_dict': {
|
||||
'title': 'Mister Metokur',
|
||||
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
|
||||
},
|
||||
'playlist_count': 121,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
page_num += 1
|
||||
webpage = self._download_webpage(
|
||||
f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
|
||||
channel_id, note=f'Downloading page {page_num}')
|
||||
|
||||
items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
|
||||
return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
|
||||
for path in orderedSet(items)]
|
||||
|
||||
return self.playlist_result(
|
||||
InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
|
||||
playlist_id=channel_id, playlist_title=title)
|
@ -1,13 +1,17 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
@ -21,7 +25,7 @@ class AluraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '60095',
|
||||
'ext': 'mp4',
|
||||
'title': 'Referências, ref-set e alter',
|
||||
'title': 'Referências, ref-set e alter'
|
||||
},
|
||||
'skip': 'Requires alura account credentials'},
|
||||
{
|
||||
@ -30,12 +34,12 @@ class AluraIE(InfoExtractor):
|
||||
'only_matching': True},
|
||||
{
|
||||
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
|
||||
'only_matching': True},
|
||||
'only_matching': True}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||
course, video_id = self._match_valid_url(url)
|
||||
video_url = self._VIDEO_URL % (course, video_id)
|
||||
|
||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||
@ -48,7 +52,7 @@ class AluraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video_obj in video_dict:
|
||||
video_url_m3u8 = video_obj.get('mp4')
|
||||
video_url_m3u8 = video_obj.get('link')
|
||||
video_format = self._extract_m3u8_formats(
|
||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
@ -62,7 +66,7 @@ class AluraIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
"formats": formats
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@ -91,7 +95,7 @@ class AluraIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
@ -103,7 +107,7 @@ class AluraIE(InfoExtractor):
|
||||
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@ -119,7 +123,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if AluraIE.suitable(url) else super().suitable(url)
|
||||
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@ -157,7 +161,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
'url': video_url,
|
||||
'id_key': self.ie_key(),
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_number': chapter_number
|
||||
}
|
||||
entries.append(entry)
|
||||
return self.playlist_result(entries, course_path, course_title)
|
||||
|
@ -1,77 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill',
|
||||
},
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
f'https://amara.org/api/videos/{video_id}/',
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
id = self._match_id(url)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
webpage = self._download_webpage(url, id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
transform_source=js_to_json)
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
|
||||
|
||||
class AmazonReviewsIE(InfoExtractor):
|
||||
|
@ -22,11 +22,8 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb',
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
@ -49,7 +46,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
@ -71,7 +68,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
@ -26,7 +26,6 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@ -64,8 +63,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||
% (requestor_id.lower(), display_id), display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
@ -76,15 +75,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + releasePid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
@ -131,7 +130,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = next(iter(ns_keys))
|
||||
ns = list(ns_keys)[0]
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
|
@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
resource_type = 'episodes'
|
||||
|
||||
resource = self._download_json(
|
||||
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = f'season_{season_number}'
|
||||
playlist_title = f'Season {season_number}'
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = f'media-{name}'
|
||||
media_name = 'media-%s' % name
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
|
@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
|
||||
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 1359.0,
|
||||
},
|
||||
'duration': 1359.0
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
|
||||
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
|
||||
@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
|
||||
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 3276.0,
|
||||
},
|
||||
'duration': 3276.0
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
# Angel uses cloudinary in the background and supports image transformations.
|
||||
|
@ -5,26 +5,22 @@ from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
make_archive_id,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class AntennaBaseIE(InfoExtractor):
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
|
||||
video_id, query={'cid': cid or video_id})
|
||||
if not info.get('url'):
|
||||
raise ExtractorError(f'No source found for {video_id}')
|
||||
|
||||
ext = determine_ext(info['url'])
|
||||
if ext == 'm3u8':
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
|
||||
else:
|
||||
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
|
||||
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
@ -34,31 +30,21 @@ class AntennaBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class AntennaGrWatchIE(AntennaBaseIE):
|
||||
IE_NAME = 'antenna:watch'
|
||||
IE_DESC = 'antenna.gr and ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
|
||||
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
|
||||
'info_dict': {
|
||||
'id': '1643812',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -66,26 +52,25 @@ class AntennaGrWatchIE(AntennaBaseIE):
|
||||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage, default=None)
|
||||
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)]
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1666166520,
|
||||
'upload_date': '20221019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
@ -105,19 +90,19 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError(f'no videos found for {video_id}', expected=True)
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/templates/data/jsonPlayer'
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
@ -8,8 +8,10 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
@ -31,6 +33,24 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@ -218,7 +238,32 @@ class AnvatoIE(InfoExtractor):
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
@ -232,8 +277,8 @@ class AnvatoIE(InfoExtractor):
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = bytes(aes_encrypt(
|
||||
list(input_data[:64].encode()), list(self._AUTH_KEY)))
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
@ -245,6 +290,8 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
@ -252,7 +299,7 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode())
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
@ -311,7 +358,7 @@ class AnvatoIE(InfoExtractor):
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
@ -10,7 +10,6 @@ from ..utils import (
|
||||
|
||||
|
||||
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
@ -30,7 +29,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
|
||||
@ -46,7 +45,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
|
||||
'only_matching': True,
|
||||
@ -83,10 +82,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
|
@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'{base_url}/player/{video_id}', video_id)
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
|
||||
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
|
@ -1,5 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
@ -1,45 +1,30 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 5369,
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
@ -54,24 +39,47 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
],
|
||||
]
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': f'{version}-{size}',
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
@ -143,9 +143,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '''))
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
return f'<html>{s}</html>'
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
@ -169,18 +170,18 @@ class AppleTrailersIE(InfoExtractor):
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for fmt in settings['metadata']['sizes']:
|
||||
for format in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': fmt['type'],
|
||||
'width': int_or_none(fmt['width']),
|
||||
'height': int_or_none(fmt['height']),
|
||||
'format': format['type'],
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
})
|
||||
|
||||
playlist.append({
|
||||
@ -228,7 +229,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
@ -269,7 +270,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
|
||||
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
|
@ -1,11 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@ -32,7 +32,6 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@ -51,9 +50,10 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'release_date': '19681210',
|
||||
'timestamp': 1268695290,
|
||||
'upload_date': '20100315',
|
||||
'creators': ['SRI International'],
|
||||
'creator': 'SRI International',
|
||||
'uploader': 'laura@archive.org',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'release_year': 1968,
|
||||
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
|
||||
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
||||
@ -111,7 +111,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Turning',
|
||||
'ext': 'flac',
|
||||
'track': 'Turning',
|
||||
'creators': ['Grateful Dead'],
|
||||
'creator': 'Grateful Dead',
|
||||
'display_id': 'gd1977-05-08d01t01.flac',
|
||||
'track_number': 1,
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
@ -131,10 +131,11 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'location': 'Barton Hall - Cornell University',
|
||||
'duration': 438.68,
|
||||
'track': 'Deal',
|
||||
'creators': ['Grateful Dead'],
|
||||
'creator': 'Grateful Dead',
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'release_date': '19770508',
|
||||
'display_id': 'gd1977-05-08d01t07.flac',
|
||||
'release_year': 1977,
|
||||
'track_number': 7,
|
||||
},
|
||||
}, {
|
||||
@ -146,7 +147,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Bells Of Rostov',
|
||||
'ext': 'mp3',
|
||||
},
|
||||
'skip': 'restricted',
|
||||
'skip': 'restricted'
|
||||
}, {
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
|
||||
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
|
||||
@ -159,7 +160,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:012b2d668ae753be36896f343d12a236',
|
||||
'upload_date': '20190928',
|
||||
},
|
||||
'skip': 'restricted',
|
||||
'skip': 'restricted'
|
||||
}, {
|
||||
# Original formats are private
|
||||
'url': 'https://archive.org/details/irelandthemakingofarepublic',
|
||||
@ -169,7 +170,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'upload_date': '20160610',
|
||||
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
|
||||
'uploader': 'dimitrios@archive.org',
|
||||
'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'timestamp': 1465594947,
|
||||
},
|
||||
'playlist': [
|
||||
@ -203,28 +204,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'info_dict': {
|
||||
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'ext': 'mp3',
|
||||
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'creators': ['Grateful Dead'],
|
||||
'duration': 338.31,
|
||||
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||
'display_id': 'gd95-04-02d1t04.shn',
|
||||
'location': 'Pyramid Arena',
|
||||
'uploader': 'jon@archive.org',
|
||||
'album': '1995-04-02 - Pyramid Arena',
|
||||
'upload_date': '20040519',
|
||||
'track_number': 4,
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}
|
||||
]
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -241,7 +222,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
identifier, _, entry_id = video_id.partition('/')
|
||||
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
@ -267,7 +248,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/'),
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
}
|
||||
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
@ -279,7 +260,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': m['title'],
|
||||
'description': clean_html(m.get('description')),
|
||||
'uploader': dict_get(m, ['uploader', 'adder']),
|
||||
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'creator': m.get('creator'),
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
@ -294,7 +275,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': f.get('title') or f['name'],
|
||||
'display_id': f['name'],
|
||||
'description': clean_html(f.get('description')),
|
||||
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'creator': f.get('creator'),
|
||||
'duration': parse_duration(f.get('length')),
|
||||
'track_number': int_or_none(f.get('track')),
|
||||
'album': f.get('album'),
|
||||
@ -314,9 +295,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'height': int_or_none(f.get('width')),
|
||||
'filesize': int_or_none(f.get('size'))})
|
||||
|
||||
_, has_ext, extension = f['name'].rpartition('.')
|
||||
if not has_ext:
|
||||
extension = None
|
||||
extension = (f['name'].rsplit('.', 1) + [None])[1]
|
||||
|
||||
# We don't want to skip private formats if the user has access to them,
|
||||
# however without access to an account with such privileges we can't implement/test this.
|
||||
@ -324,14 +303,14 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https',
|
||||
'source_preference': 0 if f.get('source') == 'original' else -1,
|
||||
'format_note': f.get('source'),
|
||||
'format_note': f.get('source')
|
||||
})
|
||||
|
||||
for entry in entries.values():
|
||||
@ -355,7 +334,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
info['comments'].append({
|
||||
'id': review.get('review_id'),
|
||||
'author': review.get('reviewer'),
|
||||
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
||||
'timestamp': unified_timestamp(review.get('createdate')),
|
||||
'parent': 'root'})
|
||||
|
||||
@ -394,7 +373,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Internal link
|
||||
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
|
||||
@ -411,7 +390,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
|
||||
@ -426,8 +405,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima',
|
||||
},
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
|
||||
@ -444,7 +423,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
'info_dict': {
|
||||
@ -460,7 +439,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
|
||||
@ -477,7 +456,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
|
||||
@ -496,15 +475,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)',
|
||||
],
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
]
|
||||
}, { # Very old YouTube page, has - YouTube in title.
|
||||
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
|
||||
'info_dict': {
|
||||
'id': '-06-KB9XTzg',
|
||||
'ext': 'flv',
|
||||
'title': 'New Coin Hack!! 100% Safe!!',
|
||||
},
|
||||
'title': 'New Coin Hack!! 100% Safe!!'
|
||||
}
|
||||
}, {
|
||||
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
|
||||
'info_dict': {
|
||||
@ -518,7 +497,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
|
||||
@ -535,7 +514,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
@ -550,7 +529,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
@ -565,7 +544,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
@ -580,7 +559,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
@ -594,7 +573,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
@ -610,7 +589,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
@ -627,7 +606,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
@ -642,7 +621,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
@ -657,35 +636,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
# Video not archived, only capture is unavailable video page
|
||||
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, { # Encoded url
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
'only_matching': True
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
@ -696,13 +675,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
|
||||
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
|
||||
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@ -711,14 +690,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'limit': 500,
|
||||
'filter': ['statuscode:200'] + (filters or []),
|
||||
'collapse': collapse or [],
|
||||
**(query or {}),
|
||||
**(query or {})
|
||||
}
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return [dict(zip(res[0], v)) for v in res[1:]]
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
elif not isinstance(res, list) or len(res) != 0:
|
||||
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
|
||||
|
||||
@ -875,7 +854,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
{
|
||||
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
|
||||
'filesize': int_or_none(thumbnail_dict.get('length')),
|
||||
'preference': int_or_none(thumbnail_dict.get('length')),
|
||||
'preference': int_or_none(thumbnail_dict.get('length'))
|
||||
} for thumbnail_dict in response)
|
||||
if not try_all:
|
||||
break
|
||||
@ -916,7 +895,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
@ -947,24 +926,258 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = urllib.parse.unquote(urlh.url)
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
||||
if itag and itag in YoutubeIE._formats:
|
||||
fmt.update(YoutubeIE._formats[itag])
|
||||
fmt.update({'format_id': itag})
|
||||
format.update(YoutubeIE._formats[itag])
|
||||
format.update({'format_id': itag})
|
||||
else:
|
||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
||||
ext = (mimetype2ext(mime)
|
||||
or urlhandle_detect_ext(urlh)
|
||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
||||
fmt.update({'ext': ext})
|
||||
info['formats'] = [fmt]
|
||||
format.update({'ext': ext})
|
||||
info['formats'] = [format]
|
||||
if not info.get('duration'):
|
||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
@ -4,7 +4,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@ -12,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
@ -75,12 +74,12 @@ class ArcPublishingIE(InfoExtractor):
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append(f'arcpublishing:{org}:{uuid}')
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -123,7 +122,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all(f.get('acodec') == 'none' for f in m3u8_formats):
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
height = f.get('height')
|
||||
@ -137,7 +136,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
@ -1,25 +1,24 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
@ -62,6 +61,45 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _ARD_extract_episode_info(self, title):
|
||||
"""Try to extract season/episode data from the title."""
|
||||
res = {}
|
||||
if not title:
|
||||
return res
|
||||
|
||||
for pattern in [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
]:
|
||||
m = re.match(pattern, title)
|
||||
if m:
|
||||
groupdict = m.groupdict()
|
||||
res['season_number'] = int_or_none(groupdict.get('season_number'))
|
||||
res['episode_number'] = int_or_none(groupdict.get('episode_number'))
|
||||
res['episode'] = str_or_none(groupdict.get('episode'))
|
||||
# Build the episode title by removing numeric episode information:
|
||||
if groupdict.get('ep_info') and not res['episode']:
|
||||
res['episode'] = str_or_none(
|
||||
title.replace(groupdict.get('ep_info'), ''))
|
||||
if res['episode']:
|
||||
res['episode'] = res['episode'].strip()
|
||||
break
|
||||
|
||||
# As a fallback use the whole title as the episode name:
|
||||
if not res.get('episode'):
|
||||
res['episode'] = title.strip()
|
||||
return res
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
@ -85,7 +123,7 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@ -96,12 +134,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': f'a{num}-rtmp-{quality}',
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': f'a{num}-{ext}-{quality}',
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
@ -117,12 +155,144 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
|
||||
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = self._match_valid_url(url)
|
||||
|
||||
document_id = None
|
||||
|
||||
numid = re.search(r'documentId=([0-9]+)', url)
|
||||
if numid:
|
||||
document_id = video_id = numid.group(1)
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
# structure altogether.
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
media_streams = re.findall(r'''(?x)
|
||||
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
|
||||
"([^"]+)"''', webpage)
|
||||
|
||||
if media_streams:
|
||||
QUALITIES = qualities(['lo', 'hi', 'hq'])
|
||||
formats = []
|
||||
for furl in set(media_streams):
|
||||
if furl.endswith('.f4m'):
|
||||
fid = 'f4m'
|
||||
else:
|
||||
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
|
||||
fid = fid_m.group(1) if fid_m else None
|
||||
formats.append({
|
||||
'quality': QUALITIES(fid),
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||
webpage, 'media id', default=None)
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 7.12.2023
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||
'md5': '94812e6438488fb923c361a44469614b',
|
||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
||||
'info_dict': {
|
||||
'id': 'maischberger-video-424',
|
||||
'display_id': 'maischberger-video-424',
|
||||
@ -229,36 +399,31 @@ class ARDIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
(?:[^?#]+/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
(?:(?P<client>[^/]+)/)?
|
||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||
'info_dict': {
|
||||
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'id': '12939099',
|
||||
'title': 'Liebe auf vier Pfoten',
|
||||
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||
'duration': 5222,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||
'timestamp': 1701343800,
|
||||
'upload_date': '20231130',
|
||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||
'id': '12172961',
|
||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||
'duration': 5241,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||
'timestamp': 1670710500,
|
||||
'upload_date': '20221210',
|
||||
'ext': 'mp4',
|
||||
'episode': 'Liebe auf vier Pfoten',
|
||||
'series': 'Filme im MDR',
|
||||
'age_limit': 0,
|
||||
'channel': 'MDR',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
|
||||
'age_limit': 12,
|
||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||
'series': 'Filme im MDR'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@ -285,49 +450,11 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||
'channel': 'ARD-Aktuell',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'md5': 'c428b9effff18ff624d4f903bda26315',
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
'timestamp': 1696491171,
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@ -344,260 +471,203 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_episode_info(self, title):
|
||||
patterns = [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
# As a fallback use the full title
|
||||
r'(?P<title>.*)',
|
||||
]
|
||||
|
||||
return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'episode': ((
|
||||
('episode', {str_or_none}),
|
||||
('ep_info', {lambda x: title.replace(x, '')}),
|
||||
('title', {str}),
|
||||
), {str.strip}),
|
||||
}), get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
|
||||
if old_id is not None:
|
||||
video_id = str(old_id)
|
||||
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
|
||||
else:
|
||||
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
|
||||
video_id = display_id
|
||||
archive_ids = None
|
||||
|
||||
player_data = traverse_obj(
|
||||
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
|
||||
is_live = player_data.get('type') == 'player_live'
|
||||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for stream in traverse_obj(media_data, ('streams', ..., {dict})):
|
||||
kind = stream.get('kind')
|
||||
# Prioritize main stream over sign language and others
|
||||
preference = 1 if kind == 'main' else None
|
||||
for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
|
||||
media_url = media['url']
|
||||
|
||||
audio_kind = traverse_obj(media, (
|
||||
'audios', 0, 'kind', {str}), default='').replace('standard', '')
|
||||
lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
|
||||
lang = join_nonempty(lang_code, audio_kind)
|
||||
language_preference = 10 if lang == 'deu' else -10
|
||||
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
|
||||
for f in fmts:
|
||||
f['language'] = lang
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': f'http-{kind}',
|
||||
'preference': preference,
|
||||
'language': lang,
|
||||
'language_preference': language_preference,
|
||||
**traverse_obj(media, {
|
||||
'format_note': ('forcedLabel', {str}),
|
||||
'width': ('maxHResolutionPx', {int_or_none}),
|
||||
'height': ('maxVResolutionPx', {int_or_none}),
|
||||
'vcodec': ('videoCodec', {str}),
|
||||
}),
|
||||
})
|
||||
|
||||
for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
|
||||
for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
|
||||
subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
|
||||
'url': sources['url'],
|
||||
'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
|
||||
})
|
||||
|
||||
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
|
||||
'series': 'seriesTitle',
|
||||
'thumbnail': ('images', 0, 'url', {url_or_none}),
|
||||
'duration': ('durationSeconds', {int_or_none}),
|
||||
'channel': 'clipSourceName',
|
||||
})),
|
||||
**self._extract_episode_info(page_data.get('title')),
|
||||
'_old_archive_ids': archive_ids,
|
||||
}
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
(?:(?P<display_id>[^?#]+?)/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
(?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
|
||||
'display_id': 'quiz/staffel-1-originalversion',
|
||||
'title': 'Staffel 1 Originalversion',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
|
||||
'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
|
||||
'title': 'Staffel 4 mit Audiodeskription',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
|
||||
'display_id': 'babylon-berlin/staffel-1',
|
||||
'title': 'Staffel 1',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'display_id': 'tatort',
|
||||
'title': 'Tatort',
|
||||
},
|
||||
'playlist_mincount': 500,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'info_dict': {
|
||||
'id': '5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'display_id': 'die-kirche-bleibt-im-dorf',
|
||||
'title': 'Die Kirche bleibt im Dorf',
|
||||
'description': 'Die Kirche bleibt im Dorf',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# playlist of type 'sendung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'serie'
|
||||
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'sammlung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 100
|
||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
||||
""" Query the ARD server for playlist information
|
||||
and returns the data in "raw" format """
|
||||
if mode == 'sendung':
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
showPage(
|
||||
client: "%s"
|
||||
showId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
else: # mode == 'sammlung'
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
morePage(
|
||||
client: "%s"
|
||||
compilationId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
widget {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
# Ressources for ARD graphQL debugging:
|
||||
# https://api-test.ardmediathek.de/public-gateway
|
||||
show_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
'[Playlist] %s' % display_id,
|
||||
data=graphQL,
|
||||
headers={'Content-Type': 'application/json'})['data']
|
||||
# align the structure of the returned data:
|
||||
if mode == 'sendung':
|
||||
show_page = show_page['showPage']
|
||||
else: # mode == 'sammlung'
|
||||
show_page = show_page['morePage']['widget']
|
||||
return show_page
|
||||
|
||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||
""" Collects all playlist entries and returns them as info dict.
|
||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
||||
playlists. """
|
||||
entries = []
|
||||
pageNumber = 0
|
||||
while True: # iterate by pageNumber
|
||||
show_page = self._ARD_load_playlist_snipped(
|
||||
playlist_id, display_id, client, mode, pageNumber)
|
||||
for teaser in show_page['teasers']: # process playlist items
|
||||
if '/compilation/' in teaser['links']['target']['href']:
|
||||
# alternativ cond.: teaser['type'] == "compilation"
|
||||
# => This is an nested compilation, e.g. like:
|
||||
# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
|
||||
link_mode = 'sammlung'
|
||||
else:
|
||||
link_mode = 'video'
|
||||
|
||||
item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
|
||||
client, link_mode, display_id,
|
||||
# perform HTLM quoting of episode title similar to ARD:
|
||||
re.sub('^-|-$', '', # remove '-' from begin/end
|
||||
re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
|
||||
teaser['links']['target']['title'].lower()
|
||||
.replace('ä', 'ae').replace('ö', 'oe')
|
||||
.replace('ü', 'ue').replace('ß', 'ss'))),
|
||||
teaser['links']['target']['id'])
|
||||
entries.append(self.url_result(
|
||||
item_url,
|
||||
ie=ARDBetaMediathekIE.ie_key()))
|
||||
|
||||
if (show_page['pagination']['pageSize'] * (pageNumber + 1)
|
||||
>= show_page['pagination']['totalElements']):
|
||||
# we've processed enough pages to get all playlist entries
|
||||
break
|
||||
pageNumber = pageNumber + 1
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'season', 'version')
|
||||
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client', 'season')
|
||||
display_id, client = display_id or video_id, client or 'ard'
|
||||
|
||||
def call_api(page_num):
|
||||
api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
|
||||
return self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
|
||||
f'Downloading playlist page {page_num}', query={
|
||||
'pageNumber': page_num,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
**({
|
||||
'seasoned': 'true',
|
||||
'seasonNumber': season_number,
|
||||
'withOriginalversion': 'true' if version == 'OV' else 'false',
|
||||
'withAudiodescription': 'true' if version == 'AD' else 'false',
|
||||
} if season_number else {}),
|
||||
if playlist_type:
|
||||
# TODO: Extract only specified season
|
||||
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
src
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (client, video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
'thumbnail': (media_collection.get('_previewImage')
|
||||
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||
or self.get_thumbnail_from_html(display_id, url)),
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
return info
|
||||
|
||||
def fetch_page(page_num):
|
||||
for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
|
||||
item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
|
||||
if not item_id or item_id == playlist_id:
|
||||
continue
|
||||
item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
|
||||
yield self.url_result(
|
||||
f'https://www.ardmediathek.de/{item_mode}/{item_id}',
|
||||
ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
|
||||
**traverse_obj(item, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('longTitle', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('broadcastedOn', {parse_iso8601}),
|
||||
}))
|
||||
|
||||
page_data = call_api(0)
|
||||
full_id = join_nonempty(playlist_id, season_number, version, delim='_')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
|
||||
title=page_data.get('title'), description=page_data.get('synopsis'))
|
||||
def get_thumbnail_from_html(self, display_id, url):
|
||||
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||
return (
|
||||
self._og_search_thumbnail(webpage, default=None)
|
||||
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||
|
@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
media = self._download_json(
|
||||
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
@ -1,9 +1,11 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@ -33,7 +35,7 @@ class ArnesIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
@ -91,6 +93,6 @@ class ArnesIE(InfoExtractor):
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get('t', [None])[0]),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user