Updated ytdlp version
This commit is contained in:
@@ -13,6 +13,7 @@ import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
@@ -21,9 +22,21 @@ import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
@@ -32,8 +45,8 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
UnsupportedError,
|
||||
@@ -56,7 +69,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
network_exceptions,
|
||||
netrc_from_content,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
@@ -66,7 +79,6 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@@ -78,8 +90,6 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@@ -132,6 +142,7 @@ class InfoExtractor:
|
||||
is parsed from a string (in case of
|
||||
fragmented media)
|
||||
for MSS - URL of the ISM manifest.
|
||||
* request_data Data to send in POST request to the URL
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media:
|
||||
@@ -219,7 +230,8 @@ class InfoExtractor:
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* has_drm True if the format has DRM and cannot be downloaded.
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
@@ -285,6 +297,7 @@ class InfoExtractor:
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
channel_follower_count: Number of followers of the channel.
|
||||
channel_is_verified: Whether the channel is verified on the platform.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@@ -313,6 +326,11 @@ class InfoExtractor:
|
||||
* "author" - human-readable name of the comment author
|
||||
* "author_id" - user ID of the comment author
|
||||
* "author_thumbnail" - The thumbnail of the comment author
|
||||
* "author_url" - The url to the comment author's page
|
||||
* "author_is_verified" - Whether the author is verified
|
||||
on the platform
|
||||
* "author_is_uploader" - Whether the comment is made by
|
||||
the video uploader
|
||||
* "id" - Comment ID
|
||||
* "html" - Comment as HTML
|
||||
* "text" - Plain text of the comment
|
||||
@@ -324,8 +342,8 @@ class InfoExtractor:
|
||||
* "dislike_count" - Number of negative ratings of the comment
|
||||
* "is_favorited" - Whether the comment is marked as
|
||||
favorite by the video uploader
|
||||
* "author_is_uploader" - Whether the comment is made by
|
||||
the video uploader
|
||||
* "is_pinned" - Whether the comment is pinned to
|
||||
the top of the comments
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||
should allow to get the same result again. (It will be set
|
||||
@@ -349,6 +367,10 @@ class InfoExtractor:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
* "title" (optional, string)
|
||||
heatmap: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the data point in seconds
|
||||
* "end_time" - The end time of the data point in seconds
|
||||
* "value" - The normalized value of the data point (float between 0 and 1)
|
||||
playable_in_embed: Whether this video is allowed to play in embedded
|
||||
players on other sites. Can be True (=always allowed),
|
||||
False (=never allowed), None (=unknown), or a string
|
||||
@@ -460,8 +482,8 @@ class InfoExtractor:
|
||||
|
||||
|
||||
Subclasses of this should also be added to the list of extractors and
|
||||
should define a _VALID_URL regexp and, re-define the _real_extract() and
|
||||
(optionally) _real_initialize() methods.
|
||||
should define _VALID_URL as a regexp or a Sequence of regexps, and
|
||||
re-define the _real_extract() and (optionally) _real_initialize() methods.
|
||||
|
||||
Subclasses may also override suitable() if necessary, but ensure the function
|
||||
signature is preserved and that this function imports everything it needs
|
||||
@@ -524,7 +546,7 @@ class InfoExtractor:
|
||||
_EMBED_REGEX = []
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
@@ -551,8 +573,8 @@ class InfoExtractor:
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url)
|
||||
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
||||
return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@@ -707,11 +729,11 @@ class InfoExtractor:
|
||||
except UnsupportedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
e.video_id = e.video_id or self.get_temp_id(url),
|
||||
e.video_id = e.video_id or self.get_temp_id(url)
|
||||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
|
||||
@@ -770,20 +792,25 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, urllib.error.HTTPError)
|
||||
assert isinstance(err, HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
return expected_status(err.status) is True
|
||||
else:
|
||||
return err.code in variadic(expected_status)
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
return sanitized_Request(url_or_request, data, headers or {})
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
url_or_request = urllib_req_to_req(url_or_request)
|
||||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
"""
|
||||
@@ -819,14 +846,9 @@ class InfoExtractor:
|
||||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError):
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
return err.response
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
@@ -958,11 +980,11 @@ class InfoExtractor:
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.geturl(), video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
@@ -1020,7 +1042,7 @@ class InfoExtractor:
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
@@ -1094,7 +1116,7 @@ class InfoExtractor:
|
||||
while True:
|
||||
try:
|
||||
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
@@ -1280,45 +1302,48 @@ class InfoExtractor:
|
||||
return clean_html(res)
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self.get_param('usenetrc', False):
|
||||
try:
|
||||
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
|
||||
if os.path.isdir(netrc_file):
|
||||
netrc_file = os.path.join(netrc_file, '.netrc')
|
||||
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
cmd = self.get_param('netrc_cmd')
|
||||
if cmd:
|
||||
cmd = cmd.replace('{}', netrc_machine)
|
||||
self.to_screen(f'Executing command: {cmd}')
|
||||
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
|
||||
if ret != 0:
|
||||
raise OSError(f'Command returned error code {ret}')
|
||||
info = netrc_from_content(stdout).authenticators(netrc_machine)
|
||||
|
||||
return username, password
|
||||
elif self.get_param('usenetrc', False):
|
||||
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
|
||||
if os.path.isdir(netrc_file):
|
||||
netrc_file = os.path.join(netrc_file, '.netrc')
|
||||
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
|
||||
|
||||
else:
|
||||
return None, None
|
||||
if not info:
|
||||
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
First look for the manually specified credentials using username_option
|
||||
and password_option as keys in params dictionary. If no such credentials
|
||||
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||
value.
|
||||
are available try the netrc_cmd if it is defined or look in the
|
||||
netrc file using the netrc_machine or _NETRC_MACHINE value.
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
username = self.get_param(username_option)
|
||||
if username is not None:
|
||||
password = self.get_param(password_option)
|
||||
else:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
|
||||
try:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(f'Failed to parse .netrc: {err}')
|
||||
return None, None
|
||||
return username, password
|
||||
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
@@ -1338,7 +1363,7 @@ class InfoExtractor:
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
|
||||
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
|
||||
% {'prop': re.escape(prop), 'sep': '(?::|[:-])'})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
@@ -1788,7 +1813,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
manifest, urlh = res
|
||||
manifest_url = urlh.geturl()
|
||||
manifest_url = urlh.url
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
|
||||
@@ -1947,7 +1972,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
m3u8_url = urlh.url
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
@@ -1961,11 +1986,7 @@ class InfoExtractor:
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), m3u8_doc)
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
@@ -2063,6 +2084,7 @@ class InfoExtractor:
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
} for idx in _extract_m3u8_playlist_indices(manifest_url))
|
||||
|
||||
@@ -2122,6 +2144,7 @@ class InfoExtractor:
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
}
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
@@ -2225,18 +2248,10 @@ class InfoExtractor:
|
||||
if res is False:
|
||||
assert not fatal
|
||||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
|
||||
namespace=self._parse_smil_namespace(smil))
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
@@ -2250,7 +2265,7 @@ class InfoExtractor:
|
||||
return {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
@@ -2262,9 +2277,8 @@ class InfoExtractor:
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
formats, subtitles = self._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
@@ -2303,7 +2317,14 @@ class InfoExtractor:
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self._report_ignoring_subs('SMIL')
|
||||
return fmts
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
@@ -2311,7 +2332,7 @@ class InfoExtractor:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
@@ -2359,8 +2380,9 @@ class InfoExtractor:
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
@@ -2381,11 +2403,15 @@ class InfoExtractor:
|
||||
f4m_url += urllib.parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
self._merge_subtitles(mpd_subs, target=subtitles)
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
|
||||
src_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(ism_formats)
|
||||
self._merge_subtitles(ism_subs, target=subtitles)
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
@@ -2416,7 +2442,10 @@ class InfoExtractor:
|
||||
'format_note': 'SMIL storyboards',
|
||||
})
|
||||
|
||||
return formats
|
||||
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
self._merge_subtitles(smil_subs, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
@@ -2442,7 +2471,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
xspf, urlh = res
|
||||
xspf_url = urlh.geturl()
|
||||
xspf_url = urlh.url
|
||||
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
@@ -2513,7 +2542,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.geturl()
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
@@ -2884,7 +2913,7 @@ class InfoExtractor:
|
||||
if ism_doc is None:
|
||||
return [], {}
|
||||
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
|
||||
|
||||
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
@@ -2980,6 +3009,8 @@ class InfoExtractor:
|
||||
'protocol': 'ism',
|
||||
'fragments': fragments,
|
||||
'has_drm': ism_doc.find('Protection') is not None,
|
||||
'language': stream_language,
|
||||
'audio_channels': int_or_none(track.get('Channels')),
|
||||
'_download_params': {
|
||||
'stream_type': stream_type,
|
||||
'duration': duration,
|
||||
@@ -3435,7 +3466,7 @@ class InfoExtractor:
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
||||
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
@@ -3510,8 +3541,8 @@ class InfoExtractor:
|
||||
@classmethod
|
||||
def is_single_video(cls, url):
|
||||
"""Returns whether the URL is of a single video, None if unknown"""
|
||||
assert cls.suitable(url), 'The URL must be suitable for the extractor'
|
||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||
if cls.suitable(url):
|
||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||
|
||||
@classmethod
|
||||
def is_suitable(cls, age_limit):
|
||||
@@ -3524,7 +3555,7 @@ class InfoExtractor:
|
||||
desc = ''
|
||||
if cls._NETRC_MACHINE:
|
||||
if markdown:
|
||||
desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
|
||||
desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")'
|
||||
else:
|
||||
desc += f' [{cls._NETRC_MACHINE}]'
|
||||
if cls.IE_DESC is False:
|
||||
@@ -3646,6 +3677,42 @@ class InfoExtractor:
|
||||
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
|
||||
or default)
|
||||
|
||||
def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
|
||||
if not duration:
|
||||
return
|
||||
chapter_list = [{
|
||||
'start_time': start_function(chapter),
|
||||
'title': title_function(chapter),
|
||||
} for chapter in chapter_list or []]
|
||||
if strict:
|
||||
warn = self.report_warning
|
||||
else:
|
||||
warn = self.write_debug
|
||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||
|
||||
chapters = [{'start_time': 0}]
|
||||
for idx, chapter in enumerate(chapter_list):
|
||||
if chapter['start_time'] is None:
|
||||
warn(f'Incomplete chapter {idx}')
|
||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||
chapters.append(chapter)
|
||||
elif chapter not in chapters:
|
||||
issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
|
||||
else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
|
||||
warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
|
||||
return chapters[1:]
|
||||
|
||||
def _extract_chapters_from_description(self, description, duration):
|
||||
duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
|
||||
sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
|
||||
return self._extract_chapters_helper(
|
||||
re.findall(sep_re % (duration_re, r'.+?'), description or ''),
|
||||
start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
|
||||
duration=duration, strict=False) or self._extract_chapters_helper(
|
||||
re.findall(sep_re % (r'.+?', duration_re), description or ''),
|
||||
start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
|
||||
duration=duration, strict=False)
|
||||
|
||||
@staticmethod
|
||||
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
|
||||
all_known = all(map(
|
||||
|
Reference in New Issue
Block a user