Updated ytdlp version

This commit is contained in:
2023-08-13 20:13:21 -05:00
parent 5264103f31
commit ee3e042b1b
344 changed files with 20808 additions and 7875 deletions

View File

@@ -30,7 +30,7 @@ from .hls import HlsFD
from .http import HttpFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD
from .niconico import NiconicoDmcFD, NiconicoLiveFD
from .rtmp import RtmpFD
from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
@@ -50,6 +50,7 @@ PROTOCOL_MAP = {
'ism': IsmFD,
'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
'niconico_live': NiconicoLiveFD,
'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD,

View File

@@ -49,10 +49,10 @@ class FileDownloader:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
file_access_retries: Number of times to retry on file access error
retries: Number of times to retry for expected network errors.
Default is 0 for API, but 10 for CLI
file_access_retries: Number of times to retry on file access error (default: 3)
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
@@ -138,17 +138,21 @@ class FileDownloader:
def format_percent(percent):
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@classmethod
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
if total is NO_DEFAULT:
rate, remaining = start_or_rate, now_or_remaining
if None in (rate, remaining):
return None
return int(float(remaining) / rate)
start, now = start_or_rate, now_or_remaining
if total is None:
return None
if now is None:
now = time.time()
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
rate = cls.calc_speed(start, now, current)
return rate and int((float(total) - float(current)) / rate)
@staticmethod
def calc_speed(start, now, bytes):
@@ -165,6 +169,12 @@ class FileDownloader:
def format_retries(retries):
return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def filesize_or_none(unencoded_filename):
if os.path.isfile(unencoded_filename):
return os.path.getsize(unencoded_filename)
return 0
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
@@ -225,7 +235,7 @@ class FileDownloader:
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
def wrapper(self, func, *args, **kwargs):
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
try:
return func(self, *args, **kwargs)
except OSError as err:
@@ -245,7 +255,8 @@ class FileDownloader:
@wrap_file_access('remove')
def try_remove(self, filename):
os.remove(filename)
if os.path.isfile(filename):
os.remove(filename)
@wrap_file_access('rename')
def try_rename(self, old_filename, new_filename):
@@ -285,7 +296,8 @@ class FileDownloader:
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
def _finish_multiline_status(self):
self._multiline.end()
@@ -407,7 +419,6 @@ class FileDownloader:
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
nooverwrites_and_exists = (
not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename))

View File

@@ -1,14 +1,16 @@
import enum
import json
import os.path
import os
import re
import subprocess
import sys
import tempfile
import time
import uuid
from .fragment import FragmentFD
from ..compat import functools
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
Popen,
@@ -23,9 +25,7 @@ from ..utils import (
encodeArgument,
encodeFilename,
find_available_port,
handle_youtubedl_headers,
remove_end,
sanitized_Request,
traverse_obj,
)
@@ -43,6 +43,7 @@ class ExternalFD(FragmentFD):
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
self._cookies_tempfile = None
try:
started = time.time()
@@ -55,6 +56,9 @@ class ExternalFD(FragmentFD):
# should take place
retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename())
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
if retval == 0:
status = {
@@ -126,6 +130,16 @@ class ExternalFD(FragmentFD):
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs)
def _write_cookies(self):
if not self.ydl.cookiejar.filename:
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
tmp_cookies.close()
self._cookies_tempfile = tmp_cookies.name
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
self.ydl.cookiejar.save(self._cookies_tempfile)
return self.ydl.cookiejar.filename or self._cookies_tempfile
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -176,7 +190,7 @@ class ExternalFD(FragmentFD):
return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
class CurlFD(ExternalFD):
@@ -185,6 +199,9 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['--cookie', cookie_header]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
@@ -215,6 +232,9 @@ class AxelFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['-H', f'{key}: {val}']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -224,7 +244,9 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies', self._write_cookies()]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
@@ -272,7 +294,7 @@ class Aria2cFD(ExternalFD):
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
cmd = [self.exe, '-c', '--no-conf',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
@@ -280,6 +302,8 @@ class Aria2cFD(ExternalFD):
else:
cmd += ['--min-split-size', '1M']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += [f'--load-cookies={self._write_cookies()}']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
@@ -334,13 +358,12 @@ class Aria2cFD(ExternalFD):
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
request = sanitized_Request(
request = Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__',
})
}, proxies={'all': None})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
@@ -418,6 +441,14 @@ class HttpieFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += [f'{key}:{val}']
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
# 2: https://httpie.io/docs/cli/sessions
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += [f'Cookie:{cookie_header}']
return cmd
@@ -528,11 +559,16 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
is_http = re.match(r'^https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
for cookie in cookies)])
if fmt.get('http_headers') and is_http:
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
if start_time:
args += ['-ss', str(start_time)]

View File

@@ -3,11 +3,11 @@ import io
import itertools
import struct
import time
import urllib.error
import urllib.parse
from .fragment import FragmentFD
from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import fix_xml_ampersands, xpath_text
@@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
man_url = urlh.url
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
@@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
except urllib.error.HTTPError as err:
if live and (err.code == 404 or err.code == 410):
except HTTPError as err:
if live and (err.status == 404 or err.status == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i

View File

@@ -1,24 +1,19 @@
import concurrent.futures
import contextlib
import http.client
import json
import math
import os
import struct
import time
import urllib.error
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name
from ..utils import (
DownloadError,
RetryManager,
encodeFilename,
sanitized_Request,
traverse_obj,
)
from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
from ..utils.networking import HTTPHeaderDict
class HttpQuietDownloader(HttpFD):
@@ -34,8 +29,8 @@ class FragmentFD(FileDownloader):
Available options:
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
and hlsnative only)
fragment_retries: Number of times to retry a fragment for HTTP error
(DASH and hlsnative only). Default is 0 for API, but 10 for CLI
skip_unavailable_fragments:
Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
@@ -75,7 +70,7 @@ class FragmentFD(FileDownloader):
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
return Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx)
@@ -121,6 +116,11 @@ class FragmentFD(FileDownloader):
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
frag_resume_len = 0
if ctx['dl'].params.get('continuedl', True):
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
@@ -155,9 +155,7 @@ class FragmentFD(FileDownloader):
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
if not ctx.setdefault('live', False):
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
@@ -170,15 +168,17 @@ class FragmentFD(FileDownloader):
**self.params,
'noprogress': True,
'test': False,
'sleep_interval': 0,
'max_sleep_interval': 0,
'sleep_interval_subtitles': 0,
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = self.filesize_or_none(tmpfilename)
if resume_len > 0:
open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check
ctx.update({
@@ -187,7 +187,9 @@ class FragmentFD(FileDownloader):
})
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -201,7 +203,12 @@ class FragmentFD(FileDownloader):
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx)
else:
if not continuedl:
if ytdl_file_exists:
self._read_ytdl_file(ctx)
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
@@ -274,12 +281,10 @@ class FragmentFD(FileDownloader):
else:
frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes)
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
if not ctx['live']:
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict)
@@ -290,14 +295,12 @@ class FragmentFD(FileDownloader):
def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
self.try_remove(ytdl_filename)
self.try_remove(self.ytdl_filename(ctx['filename']))
elapsed = time.time() - ctx['started']
to_file = ctx['tmpfilename'] != '-'
if to_file:
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
else:
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
@@ -449,7 +452,7 @@ class FragmentFD(FileDownloader):
frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None
headers = info_dict.get('http_headers', {}).copy()
headers = HTTPHeaderDict(info_dict.get('http_headers'))
byte_range = fragment.get('byte_range')
if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
@@ -466,9 +469,10 @@ class FragmentFD(FileDownloader):
for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
try:
ctx['fragment_count'] = fragment.get('fragment_count')
if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
if not self._download_fragment(
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
return
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
except (HTTPError, IncompleteRead) as err:
retry.error = err
continue
except DownloadError: # has own retry settings
@@ -496,7 +500,7 @@ class FragmentFD(FileDownloader):
download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
self.report_warning('The download speed shown is only of one thread. This is a known issue')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):

View File

@@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative'
@staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False):
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
return bool(re.search('|'.join((
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
)), manifest))
@classmethod
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
]
if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
]
def check_results():
yield not info_dict.get('is_live')
for feature in UNSUPPORTED_FEATURES:
yield not re.search(feature, manifest)
if not allow_unplayable_formats:
yield not cls._has_drm(manifest)
return all(check_results())
def real_download(self, filename, info_dict):
@@ -64,13 +75,13 @@ class HlsFD(FragmentFD):
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
man_url = urlh.url
s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download:
has_ffmpeg = FFmpegFD.available()
no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
elif no_crypto:
@@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
]), s)
if has_drm and not self.params.get('allow_unplayable_formats'):
self.report_error(
'This video is DRM protected; Try selecting another format with --format or '
'add --check-formats to automatically fallback to the next best format')
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
if info_dict.get('has_drm') and self.params.get('test'):
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
else:
self.report_error(
'This format is DRM protected; Try selecting another format with --format or '
'add --check-formats to automatically fallback to the next best format', tb=False)
return False
message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params)

View File

@@ -1,12 +1,14 @@
import http.client
import os
import random
import socket
import ssl
import time
import urllib.error
from .common import FileDownloader
from ..networking import Request
from ..networking.exceptions import (
CertificateVerifyError,
HTTPError,
TransportError,
)
from ..utils import (
ContentTooShortError,
RetryManager,
@@ -16,18 +18,10 @@ from ..utils import (
encodeFilename,
int_or_none,
parse_http_range,
sanitized_Request,
try_call,
write_xattr,
)
RESPONSE_READ_EXCEPTIONS = (
TimeoutError,
socket.timeout, # compat: py < 3.10
ConnectionError,
ssl.SSLError,
http.client.HTTPException
)
from ..utils.networking import HTTPHeaderDict
class HttpFD(FileDownloader):
@@ -45,11 +39,8 @@ class HttpFD(FileDownloader):
ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
# Disable compression
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
@@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1
request = sanitized_Request(url, request_data, headers)
request = Request(url, request_data, headers)
has_range = range_start is not None
if has_range:
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
# Establish connection
try:
ctx.data = self.ydl.urlopen(request)
@@ -150,20 +141,21 @@ class HttpFD(FileDownloader):
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
elif range_start > 0:
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
except urllib.error.HTTPError as err:
if err.code == 416:
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
except HTTPError as err:
if err.status == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except urllib.error.HTTPError as err:
if err.code < 500 or err.code >= 600:
Request(url, request_data, headers))
content_length = ctx.data.headers['Content-Length']
except HTTPError as err:
if err.status < 500 or err.status >= 600:
raise
else:
# Examine the reported length
@@ -191,17 +183,13 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
elif err.code < 500 or err.code >= 600:
elif err.status < 500 or err.status >= 600:
# Unexpected HTTP error
raise
raise RetryDownload(err)
except urllib.error.URLError as err:
if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
# Any errors that occur during this will not be wrapped by URLError
except RESPONSE_READ_EXCEPTIONS as err:
except CertificateVerifyError:
raise
except TransportError as err:
raise RetryDownload(err)
def close_stream():
@@ -211,7 +199,12 @@ class HttpFD(FileDownloader):
ctx.stream = None
def download():
data_len = ctx.data.info().get('Content-length', None)
data_len = ctx.data.headers.get('Content-length')
if ctx.data.headers.get('Content-encoding'):
# Content-encoding is present, Content-length is not reliable anymore as we are
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
data_len = None
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
@@ -252,7 +245,7 @@ class HttpFD(FileDownloader):
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
except RESPONSE_READ_EXCEPTIONS as err:
except TransportError as err:
retry(err)
byte_counter += len(data_block)
@@ -333,15 +326,15 @@ class HttpFD(FileDownloader):
elif speed:
ctx.throttle_start = None
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
return False
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment()
if ctx.tmpfilename != '-':
ctx.stream.close()
@@ -353,7 +346,7 @@ class HttpFD(FileDownloader):
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,

View File

@@ -2,9 +2,9 @@ import binascii
import io
import struct
import time
import urllib.error
from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import RetryManager
u8 = struct.Struct('>B')
@@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
except urllib.error.HTTPError as err:
except HTTPError as err:
retry.error = err
continue

View File

@@ -1,8 +1,12 @@
import json
import threading
import time
from . import get_suitable_downloader
from .common import FileDownloader
from ..utils import sanitized_Request
from .external import FFmpegFD
from ..networking import Request
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
class NiconicoDmcFD(FileDownloader):
@@ -24,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
heartbeat_data = heartbeat_info_dict['data'].encode()
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
request = sanitized_Request(heartbeat_url, heartbeat_data)
request = Request(heartbeat_url, heartbeat_data)
def heartbeat():
try:
@@ -50,3 +54,93 @@ class NiconicoDmcFD(FileDownloader):
timer[0].cancel()
download_complete = True
return success
class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
ws_url = info_dict['url']
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
cookies = info_dict.get('cookies')
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy()
new_info_dict.update({
'protocol': 'm3u8',
})
def communicate_ws(reconnect):
if reconnect:
ws = WebSocketsWrapper(ws_url, {
'Cookies': str_or_none(cookies) or '',
'Origin': f'https://{ws_origin_host}',
'Accept': '*/*',
'User-Agent': self.params['http_headers']['User-Agent'],
})
if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'stream': {
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'chasePlay': False
},
'room': {
'protocol': 'webSocket',
'commentable': True
},
'reconnect': True,
}
}))
else:
ws = ws_extractor
with ws:
while True:
recv = ws.recv()
if not recv:
continue
data = json.loads(recv)
if not data or not isinstance(data, dict):
continue
if data.get('type') == 'ping':
# pong back
ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'disconnect':
self.write_debug(data)
return True
elif data.get('type') == 'error':
self.write_debug(data)
message = try_get(data, lambda x: x['body']['code'], str) or recv
return DownloadError(message)
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv)
def ws_main():
reconnect = False
while True:
try:
ret = communicate_ws(reconnect)
if ret is True:
return
except BaseException as e:
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
time.sleep(10)
continue
finally:
reconnect = True
thread = threading.Thread(target=ws_main, daemon=True)
thread.start()
return dl.download(filename, new_info_dict)

View File

@@ -1,8 +1,8 @@
import json
import time
import urllib.error
from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import (
RegexNotFoundError,
RetryManager,
@@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
try_get,
)
from ..utils.networking import HTTPHeaderDict
class YoutubeLiveChatFD(FragmentFD):
@@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
start_time = int(time.time() * 1000)
def dl_fragment(url, data=None, headers=None):
http_headers = info_dict.get('http_headers', {})
if headers:
http_headers = http_headers.copy()
http_headers.update(headers)
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data)
def parse_actions_replay(live_chat_continuation):
@@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
or frag_index == 1 and try_refresh_replay_beginning
or parse_actions_replay)
return (True, *func(live_chat_continuation))
except urllib.error.HTTPError as err:
except HTTPError as err:
retry.error = err
continue
return False, None, None, None