diff --git a/README.md b/README.md index dd95b27..9f0ca6a 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,24 @@ SolarFM is a Gtk+ Python file manager. # Notes Still Work in progress! Use at own risk! +Additionally, if not building a .deb then just move the contents of user_config to their respective folders. +Copy the share/solarfm folder to your user .config/ directory too. + +`pyrightconfig.json` +
The pyrightconfig file needs to stay on same level as the .git folders in order to have settings detected when using pyright with lsp functionality.
+["'])? + (?P(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) + (?(quote)(?P=quote))\s* ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.fullmatch(filter_spec) if m: - comparison_value = m.group('value') + if m.group('op') == '~=': + comparison_value = re.compile(m.group('value')) + else: + comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value')) str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): op = lambda attr, value: not str_op(attr, value) @@ -1877,7 +2113,7 @@ class YoutubeDL(object): temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError) + network_exceptions: success = False finally: if os.path.exists(temp_file.name): @@ -1901,12 +2137,12 @@ class YoutubeDL(object): and download and ( not can_merge() - or info_dict.get('is_live', False) - or self.outtmpl_dict['default'] == '-')) + or info_dict.get('is_live') and not self.params.get('live_from_start') + or self.params['outtmpl']['default'] == '-')) compat = ( prefer_best or self.params.get('allow_multiple_audio_streams', False) - or 'format-spec' in self.params.get('compat_opts', [])) + or 'format-spec' in self.params['compat_opts']) return ( 'best/bestvideo+bestaudio' if prefer_best @@ -1917,7 +2153,7 @@ class YoutubeDL(object): def syntax_error(note, start): message = ( 'Invalid format specification: ' - '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' @@ -1929,90 +2165,88 @@ class YoutubeDL(object): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} - check_formats = self.params.get('check_formats') == 'selected' - def _parse_filter(tokens): filter_parts = [] - for type, string, start, _, _ in tokens: - if type == tokenize.OP and string == ']': + for type, string_, start, _, _ in tokens: + if type == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: - filter_parts.append(string) + filter_parts.append(string_) def _remove_unused_ops(tokens): - # Remove operators that we don't use and join them with the surrounding strings - # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + # Remove operators that we don't use and join them with the surrounding strings. + # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None - for type, string, start, end, line in tokens: - if type == tokenize.OP and string == '[': + for type, string_, start, end, line in tokens: + if type == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string, start, end, line + yield type, string_, start, end, line # everything inside brackets will be handled by _parse_filter - for type, string, start, end, line in tokens: - yield type, string, start, end, line - if type == tokenize.OP and string == ']': + for type, string_, start, end, line in tokens: + yield type, string_, start, end, line + if type == tokenize.OP and string_ == ']': break - elif type == tokenize.OP and string in ALLOWED_OPS: + elif type == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string, start, end, line + yield type, string_, start, end, line elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: - last_string = string + last_string = string_ last_start = start last_end = end else: - last_string += string + last_string += string_ if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None - for type, string, start, _, _ in tokens: + for type, string_, start, _, _ in tokens: # ENCODING is only defined in python 3.x if type == getattr(tokenize, 'ENCODING', None): continue elif type in [tokenize.NAME, tokenize.NUMBER]: - current_selector = FormatSelector(SINGLE, string, []) + current_selector = FormatSelector(SINGLE, string_, []) elif type == tokenize.OP: - if string == ')': + if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group tokens.restore_last_token() break - elif inside_merge and string in ['/', ',']: + elif inside_merge and string_ in ['/', ',']: tokens.restore_last_token() break - elif inside_choice and string == ',': + elif inside_choice and string_ == ',': tokens.restore_last_token() break - elif string == ',': + elif string_ == ',': if not current_selector: raise syntax_error('"," must follow a format selector', start) selectors.append(current_selector) current_selector = None - elif string == '/': + elif string_ == '/': if not current_selector: raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector second_choice = _parse_format_selection(tokens, inside_choice=True) current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) - elif string == '[': + elif string_ == '[': if not current_selector: current_selector = FormatSelector(SINGLE, 'best', []) format_filter = _parse_filter(tokens) current_selector.filters.append(format_filter) - elif string == '(': + elif string_ == '(': if current_selector: raise syntax_error('Unexpected "("', start) group = _parse_format_selection(tokens, inside_group=True) current_selector = FormatSelector(GROUP, group, []) - elif string == '+': + elif string_ == '+': if not current_selector: raise syntax_error('Unexpected "+"', start) selector_1 = current_selector @@ -2021,7 +2255,7 @@ class YoutubeDL(object): raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: - raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + raise syntax_error(f'Operator not recognized: "{string_}"', start) elif type == tokenize.ENDMARKER: break if current_selector: @@ -2057,14 +2291,13 @@ class YoutubeDL(object): the_only_video = video_fmts[0] if len(video_fmts) == 1 else None the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None - output_ext = self.params.get('merge_output_format') - if not output_ext: - if the_only_video: - output_ext = the_only_video['ext'] - elif the_only_audio and not video_fmts: - output_ext = the_only_audio['ext'] - else: - output_ext = 'mkv' + output_ext = get_compatible_ext( + vcodecs=[f.get('vcodec') for f in video_fmts], + acodecs=[f.get('acodec') for f in audio_fmts], + vexts=[f['ext'] for f in video_fmts], + aexts=[f['ext'] for f in audio_fmts], + preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) + or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) @@ -2090,6 +2323,7 @@ class YoutubeDL(object): 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), + 'aspect_ratio': the_only_video.get('aspect_ratio'), }) if the_only_audio: @@ -2097,15 +2331,25 @@ class YoutubeDL(object): 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), + 'audio_channels': the_only_audio.get('audio_channels') }) return new_dict def _check_formats(formats): - if not check_formats: + if self.params.get('check_formats') == 'selected': + yield from self._check_formats(formats) + return + elif (self.params.get('check_formats') is not None + or self.params.get('allow_unplayable_formats')): yield from formats return - yield from self._check_formats(formats) + + for f in formats: + if f.get('has_drm'): + yield from self._check_formats([f]) + else: + yield f def _build_selector_function(selector): if isinstance(selector, list): # , @@ -2145,7 +2389,8 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats( + f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] @@ -2154,7 +2399,7 @@ class YoutubeDL(object): yield merged_format else: - format_fallback, format_reverse, format_idx = False, True, 1 + format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 mobj = re.match( r'(?P best|worst|b|w)(?P video|audio|v|a)?(?P \*)?(?:\.(?P [1-9]\d*))?$', format_spec) @@ -2181,6 +2426,7 @@ class YoutubeDL(object): filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: @@ -2189,15 +2435,19 @@ class YoutubeDL(object): def selector_function(ctx): formats = list(ctx['formats']) matches = list(filter(filter_f, formats)) if filter_f is not None else formats - if format_fallback and ctx['incomplete_formats'] and not matches: - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) best/worst will fallback to - # best/worst {video,audio}-only format - matches = formats + if not matches: + if format_fallback and ctx['incomplete_formats']: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = formats + elif seperate_fallback and not ctx['has_merged_format']: + # for compatibility with youtube-dl when there is no pre-merged format + matches = list(filter(seperate_fallback, formats)) matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] - except IndexError: + except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] @@ -2209,13 +2459,13 @@ class YoutubeDL(object): return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.encode()) try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) - class TokenIterator(object): + class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 @@ -2238,13 +2488,34 @@ class YoutubeDL(object): parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) return _build_selector_function(parsed_selector) - def _calc_headers(self, info_dict): - res = std_headers.copy() - res.update(info_dict.get('http_headers') or {}) + def _calc_headers(self, info_dict, load_cookies=False): + res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers')) + clean_headers(res) - cookies = self._calc_cookies(info_dict) + if load_cookies: # For --load-info-json + self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat + self._load_cookies(info_dict.get('cookies'), autoscope=False) + # The `Cookie` header is removed to prevent leaks and unscoped cookies. + # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj + res.pop('Cookie', None) + cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) if cookies: - res['Cookie'] = cookies + encoder = LenientSimpleCookie() + values = [] + for cookie in cookies: + _, value = encoder.value_encode(cookie.value) + values.append(f'{cookie.name}={value}') + if cookie.domain: + values.append(f'Domain={cookie.domain}') + if cookie.path: + values.append(f'Path={cookie.path}') + if cookie.secure: + values.append('Secure') + if cookie.expires: + values.append(f'Expires={cookie.expires}') + if cookie.version: + values.append(f'Version={cookie.version}') + info_dict['cookies'] = '; '.join(values) if 'X-Forwarded-For' not in res: x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') @@ -2253,10 +2524,9 @@ class YoutubeDL(object): return res - def _calc_cookies(self, info_dict): - pr = sanitized_Request(info_dict['url']) - self.cookiejar.add_cookie_header(pr) - return pr.get_header('Cookie') + def _calc_cookies(self, url): + self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version') + return self.cookiejar.get_cookie_header(url) def _sort_thumbnails(self, thumbnails): thumbnails.sort(key=lambda t: ( @@ -2298,6 +2568,67 @@ class YoutubeDL(object): else: info_dict['thumbnails'] = thumbnails + def _fill_common_fields(self, info_dict, final=True): + # TODO: move sanitization here + if final: + title = info_dict['fulltitle'] = info_dict.get('title') + if not title: + if title == '': + self.write_debug('Extractor gave empty title. Creating a generic title') + else: + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' + + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + + for ts_key, date_key in ( + ('timestamp', 'upload_date'), + ('release_timestamp', 'release_date'), + ('modified_timestamp', 'modified_date'), + ): + if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + with contextlib.suppress(ValueError, OverflowError, OSError): + upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) + info_dict[date_key] = upload_date.strftime('%Y%m%d') + + live_keys = ('is_live', 'was_live') + live_status = info_dict.get('live_status') + if live_status is None: + for key in live_keys: + if info_dict.get(key) is False: + continue + if info_dict.get(key): + live_status = key + break + if all(info_dict.get(key) is False for key in live_keys): + live_status = 'not_live' + if live_status: + info_dict['live_status'] = live_status + for key in live_keys: + if info_dict.get(key) is None: + info_dict[key] = (live_status == key) + if live_status == 'post_live': + info_dict['was_live'] = True + + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + + def _raise_pending_errors(self, info): + err = info.pop('__pending_error', None) + if err: + self.report_error(err, tb=False) + + def sort_formats(self, info_dict): + formats = self._get_formats(info_dict) + formats.sort(key=FormatSorter( + self, info_dict.get('_format_sort_fields') or []).calculate_preference) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2307,14 +2638,6 @@ class YoutubeDL(object): elif not info_dict.get('id'): raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) - info_dict['fulltitle'] = info_dict.get('title') - if 'title' not in info_dict: - raise ExtractorError('Missing "title" field in extractor result', - video_id=info_dict['id'], ie=info_dict['extractor']) - elif not info_dict.get('title'): - self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' - def report_force_conversion(field, field_not, conversion): self.report_warning( '"%s" field is not %s - forcing %s conversion, there is an error in extractor' @@ -2322,21 +2645,39 @@ class YoutubeDL(object): def sanitize_string_field(info, string_field): field = info.get(string_field) - if field is None or isinstance(field, compat_str): + if field is None or isinstance(field, str): return report_force_conversion(string_field, 'a string', 'string') - info[string_field] = compat_str(field) + info[string_field] = str(field) def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): + if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) sanitize_string_field(info_dict, 'id') sanitize_numeric_fields(info_dict) + if info_dict.get('section_end') and info_dict.get('section_start') is not None: + info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3) + if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): + self.report_warning('"duration" field is negative, there is an error in extractor') + + chapters = info_dict.get('chapters') or [] + if chapters and chapters[0].get('start_time'): + chapters.insert(0, {'start_time': 0}) + + dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} + for idx, (prev, current, next_) in enumerate(zip( + (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): + if current.get('start_time') is None: + current['start_time'] = prev.get('end_time') + if not current.get('end_time'): + current['end_time'] = next_.get('start_time') + if not current.get('title'): + current['title'] = f' ' if 'playlist' not in info_dict: # It isn't part of a playlist @@ -2355,45 +2696,7 @@ class YoutubeDL(object): if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] - if info_dict.get('duration') is not None: - info_dict['duration_string'] = formatSeconds(info_dict['duration']) - - for ts_key, date_key in ( - ('timestamp', 'upload_date'), - ('release_timestamp', 'release_date'), - ('modified_timestamp', 'modified_date'), - ): - if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - try: - upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) - info_dict[date_key] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass - - live_keys = ('is_live', 'was_live') - live_status = info_dict.get('live_status') - if live_status is None: - for key in live_keys: - if info_dict.get(key) is False: - continue - if info_dict.get(key): - live_status = key - break - if all(info_dict.get(key) is False for key in live_keys): - live_status = 'not_live' - if live_status: - info_dict['live_status'] = live_status - for key in live_keys: - if info_dict.get(key) is None: - info_dict[key] = (live_status == key) - - # Auto generate title fields corresponding to the *_number fields when missing - # in order to always have clean titles. This is very common for TV series. - for field in ('chapter', 'season', 'episode'): - if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + self._fill_common_fields(info_dict) for cc_kind in ('subtitles', 'automatic_captions'): cc = info_dict.get(cc_kind) @@ -2411,24 +2714,32 @@ class YoutubeDL(object): info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) - if info_dict.get('formats') is None: - # There's only one format available - formats = [info_dict] - else: - formats = info_dict['formats'] + formats = self._get_formats(info_dict) - info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + # Backward compatibility with InfoExtractor._sort_formats + field_preference = (formats or [{}])[0].pop('__sort_fields', None) + if field_preference: + info_dict['_format_sort_fields'] = field_preference + + info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it + f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None if not self.params.get('allow_unplayable_formats'): - formats = [f for f in formats if not f.get('has_drm')] + formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe'] - if info_dict.get('is_live'): - get_from_start = bool(self.params.get('live_from_start')) + if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): + self.report_warning( + f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}' + 'only images are available for download. Use --list-formats to see them'.capitalize()) + + get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) + if not get_from_start: + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] - if not get_from_start: - info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') - - if not formats: - self.raise_no_formats(info_dict) + if get_from_start and not formats: + self.raise_no_formats(info_dict, msg=( + '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' + 'If you want to download from the current time, use --no-live-from-start')) def is_wellformed(f): url = f.get('url') @@ -2442,24 +2753,54 @@ class YoutubeDL(object): return True # Filter out malformed formats for better extraction robustness - formats = list(filter(is_wellformed, formats)) + formats = list(filter(is_wellformed, formats or [])) - formats_dict = {} + if not formats: + self.raise_no_formats(info_dict) - # We check that all the formats have the format and format_id fields - for i, format in enumerate(formats): + for format in formats: sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' + if format.get('aspect_ratio') is None: + format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average + and info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) + format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + + # Safeguard against old/insecure infojson when using --load-info-json + if info_dict.get('http_headers'): + info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers']) + info_dict['http_headers'].pop('Cookie', None) + + # This is copied to http_headers by the above _calc_headers and can now be removed + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] + + self.sort_formats({ + 'formats': formats, + '_format_sort_fields': info_dict.get('_format_sort_fields') + }) + + # Sanitize and group by format_id + formats_dict = {} + for i, format in enumerate(formats): if not format.get('format_id'): - format['format_id'] = compat_str(i) + format['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - format_id = format['format_id'] - if format_id not in formats_dict: - formats_dict[format_id] = [] - formats_dict[format_id].append(format) + formats_dict.setdefault(format['format_id'], []).append(format) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) @@ -2468,40 +2809,17 @@ class YoutubeDL(object): for i, format in enumerate(ambiguous_formats): if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 if format['format_id'] != format['ext'] and format['format_id'] in common_exts: format['format_id'] = 'f%s' % format['format_id'] - for i, format in enumerate(formats): - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), - ) - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if (info_dict.get('duration') and format.get('tbr') - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) - - # Add HTTP headers, so that external programs can use them from the - # json output - full_format_info = info_dict.copy() - full_format_info.update(format) - format['http_headers'] = self._calc_headers(full_format_info) - # Remove private housekeeping stuff - if '__x_forwarded_for_ip' in info_dict: - del info_dict['__x_forwarded_for_ip'] - - # TODO Central sorting goes here + if format.get('format') is None: + format['format'] = '{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=format_field(format, 'format_note', ' (%s)'), + ) if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) @@ -2515,11 +2833,16 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) - # The pre-processors may have modified the formats - formats = info_dict.get('formats', [info_dict]) + if self._match_entry(info_dict, incomplete=self._format_fields) is not None: + return info_dict - list_only = self.params.get('simulate') is None and ( - self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + self.post_extract(info_dict) + info_dict, _ = self.pre_process(info_dict, 'after_filter') + + # The pre-processors may have modified the formats + formats = self._get_formats(info_dict) + + list_only = self.params.get('simulate') == 'list_only' interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) @@ -2532,52 +2855,32 @@ class YoutubeDL(object): self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work - self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) - return + self.__forced_printings(info_dict) + return info_dict format_selector = self.format_selector - if format_selector is None: - req_format = self._default_format_spec(info_dict, download=download) - self.write_debug('Default format spec: %s' % req_format) - format_selector = self.build_format_selector(req_format) - while True: if interactive_format_selection: - req_format = input( - self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS)) + req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) + + '(Press ENTER for default, or Ctrl+C to quit)' + + self._format_screen(': ', self.Styles.EMPHASIS)) try: - format_selector = self.build_format_selector(req_format) + format_selector = self.build_format_selector(req_format) if req_format else None except SyntaxError as err: self.report_error(err, tb=False, is_error=False) continue - # While in format selection we may need to have an access to the original - # format set in order to calculate some metrics or do some processing. - # For now we need to be able to guess whether original formats provided - # by extractor are incomplete or not (i.e. whether extractor provides only - # video-only or audio-only formats) for proper formats selection for - # extractors with such incomplete formats (see - # https://github.com/ytdl-org/youtube-dl/pull/5556). - # Since formats may be filtered during format selection and may not match - # the original formats the results may be incorrect. Thus original formats - # or pre-calculated metrics should be passed to format selection routines - # as well. - # We will pass a context object containing all necessary additional data - # instead of just formats. - # This fixes incorrect format selection issue (see - # https://github.com/ytdl-org/youtube-dl/issues/10083). - incomplete_formats = ( - # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) - # all formats are audio-only - or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + if format_selector is None: + req_format = self._default_format_spec(info_dict, download=download) + self.write_debug(f'Default format spec: {req_format}') + format_selector = self.build_format_selector(req_format) - ctx = { + formats_to_download = list(format_selector({ 'formats': formats, - 'incomplete_formats': incomplete_formats, - } - - formats_to_download = list(format_selector(ctx)) + 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), + 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video + or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio + })) if interactive_format_selection and not formats_to_download: self.report_error('Requested format is not available', tb=False, is_error=False) continue @@ -2585,29 +2888,48 @@ class YoutubeDL(object): if not formats_to_download: if not self.params.get('ignore_no_formats_error'): - raise ExtractorError('Requested format is not available', expected=True, - video_id=info_dict['id'], ie=info_dict['extractor']) + raise ExtractorError( + 'Requested format is not available. Use --list-formats for a list of available formats', + expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) self.report_warning('Requested format is not available') # Process what we can, even without any available formats. formats_to_download = [{}] - best_format = formats_to_download[-1] + requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self)) + best_format, downloaded_formats = formats_to_download[-1], [] if download: - if best_format: - self.to_screen( - f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): ' - + ', '.join([f['format_id'] for f in formats_to_download])) + if best_format and requested_ranges: + def to_screen(*msg): + self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') + + to_screen(f'Downloading {len(formats_to_download)} format(s):', + (f['format_id'] for f in formats_to_download)) + if requested_ranges != ({}, ): + to_screen(f'Downloading {len(requested_ranges)} time ranges:', + (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges)) max_downloads_reached = False - for i, fmt in enumerate(formats_to_download): - formats_to_download[i] = new_info = dict(info_dict) - # Save a reference to the original info_dict so that it can be modified in process_info if needed + + for fmt, chapter in itertools.product(formats_to_download, requested_ranges): + new_info = self._copy_infodict(info_dict) new_info.update(fmt) - new_info['__original_infodict'] = info_dict + offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') + end_time = offset + min(chapter.get('end_time', duration), duration) + # duration may not be accurate. So allow deviations <1sec + if end_time == float('inf') or end_time > offset + duration + 1: + end_time = None + if chapter or offset: + new_info.update({ + 'section_start': offset + chapter.get('start_time', 0), + 'section_end': end_time, + 'section_title': chapter.get('title'), + 'section_number': chapter.get('index'), + }) + downloaded_formats.append(new_info) try: self.process_info(new_info) except MaxDownloadsReached: max_downloads_reached = True - new_info.pop('__original_infodict') + self._raise_pending_errors(new_info) # Remove copied info for key, val in tuple(new_info.items()): if info_dict.get(key) == val: @@ -2615,12 +2937,12 @@ class YoutubeDL(object): if max_downloads_reached: break - write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) - info_dict['requested_downloads'] = formats_to_download + info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: raise MaxDownloadsReached() @@ -2631,47 +2953,39 @@ class YoutubeDL(object): def process_subtitles(self, video_id, normal_subtitles, automatic_captions): """Select the requested subtitles and their format""" - available_subs = {} + available_subs, normal_sub_langs = {}, [] if normal_subtitles and self.params.get('writesubtitles'): available_subs.update(normal_subtitles) + normal_sub_langs = tuple(normal_subtitles.keys()) if automatic_captions and self.params.get('writeautomaticsub'): for lang, cap_info in automatic_captions.items(): if lang not in available_subs: available_subs[lang] = cap_info - if (not self.params.get('writesubtitles') and not - self.params.get('writeautomaticsub') or not - available_subs): + if not available_subs or ( + not self.params.get('writesubtitles') + and not self.params.get('writeautomaticsub')): return None - all_sub_langs = available_subs.keys() + all_sub_langs = tuple(available_subs.keys()) if self.params.get('allsubtitles', False): requested_langs = all_sub_langs elif self.params.get('subtitleslangs', False): - # A list is used so that the order of languages will be the same as - # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 - requested_langs = [] - for lang_re in self.params.get('subtitleslangs'): - if lang_re == 'all': - requested_langs.extend(all_sub_langs) - continue - discard = lang_re[0] == '-' - if discard: - lang_re = lang_re[1:] - current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) - if discard: - for lang in current_langs: - while lang in requested_langs: - requested_langs.remove(lang) - else: - requested_langs.extend(current_langs) - requested_langs = orderedSet(requested_langs) - elif 'en' in available_subs: - requested_langs = ['en'] + try: + requested_langs = orderedSet_from_options( + self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True) + except re.error as e: + raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}') else: - requested_langs = [list(all_sub_langs)[0]] + requested_langs = LazyList(itertools.chain( + ['en'] if 'en' in normal_sub_langs else [], + filter(lambda f: f.startswith('en'), normal_sub_langs), + ['en'] if 'en' in all_sub_langs else [], + filter(lambda f: f.startswith('en'), all_sub_langs), + normal_sub_langs, all_sub_langs, + ))[:1] if requested_langs: - self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) + self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}') formats_query = self.params.get('subtitlesformat', 'best') formats_preference = formats_query.split('/') if formats_query else [] @@ -2679,7 +2993,7 @@ class YoutubeDL(object): for lang in requested_langs: formats = available_subs.get(lang) if formats is None: - self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': @@ -2701,66 +3015,67 @@ class YoutubeDL(object): if info_dict is None: return info_copy = info_dict.copy() + info_copy.setdefault('filename', self.prepare_filename(info_dict)) + if info_dict.get('requested_formats') is not None: + # For RTMP URLs, also include the playpath + info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) + elif info_dict.get('url'): + info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '') info_copy['formats_table'] = self.render_formats_table(info_dict) info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) def format_tmpl(tmpl): - mobj = re.match(r'\w+(=?)$', tmpl) - if mobj and mobj.group(1): - return f'{tmpl[:-1]} = %({tmpl[:-1]})r' - elif mobj: - return f'%({tmpl})s' - return tmpl + mobj = re.fullmatch(r'([\w.:,]|-\d|(?P {([\w.:,]|-\d)+}))+=?', tmpl) + if not mobj: + return tmpl + + fmt = '%({})s' + if tmpl.startswith('{'): + tmpl, fmt = f'.{tmpl}', '%({})j' + if tmpl.endswith('='): + tmpl, fmt = tmpl[:-1], '{0} = %({0})#j' + return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(','))) for tmpl in self.params['forceprint'].get(key, []): self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): - filename = self.evaluate_outtmpl(file_tmpl, info_dict) + filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') - with io.open(filename, 'a', encoding='utf-8') as f: - f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') + if self._ensure_dir_exists(filename): + with open(filename, 'a', encoding='utf-8', newline='') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep) - def __forced_printings(self, info_dict, filename, incomplete): - def print_mandatory(field, actual_field=None): - if actual_field is None: - actual_field = field - if (self.params.get('force%s' % field, False) - and (not incomplete or info_dict.get(actual_field) is not None)): - self.to_stdout(info_dict[actual_field]) - - def print_optional(field): - if (self.params.get('force%s' % field, False) - and info_dict.get(field) is not None): - self.to_stdout(info_dict[field]) - - info_dict = info_dict.copy() - if filename is not None: - info_dict['filename'] = filename - if info_dict.get('requested_formats') is not None: - # For RTMP URLs, also include the playpath - info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) - elif 'url' in info_dict: - info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + return info_copy + def __forced_printings(self, info_dict, filename=None, incomplete=True): if (self.params.get('forcejson') or self.params['forceprint'].get('video') or self.params['print_to_file'].get('video')): self.post_extract(info_dict) - self._forceprint('video', info_dict) + if filename: + info_dict['filename'] = filename + info_copy = self._forceprint('video', info_dict) - print_mandatory('title') - print_mandatory('id') - print_mandatory('url', 'urls') - print_optional('thumbnail') - print_optional('description') - print_optional('filename') - if self.params.get('forceduration') and info_dict.get('duration') is not None: - self.to_stdout(formatSeconds(info_dict['duration'])) - print_mandatory('format') + def print_field(field, actual_field=None, optional=False): + if actual_field is None: + actual_field = field + if self.params.get(f'force{field}') and ( + info_copy.get(field) is not None or (not optional and not incomplete)): + self.to_stdout(info_copy[actual_field]) + + print_field('title') + print_field('id') + print_field('url', 'urls') + print_field('thumbnail', optional=True) + print_field('description', optional=True) + print_field('filename') + if self.params.get('forceduration') and info_copy.get('duration') is not None: + self.to_stdout(formatSeconds(info_copy['duration'])) + print_field('format') if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) @@ -2791,7 +3106,7 @@ class YoutubeDL(object): urls = '", "'.join( (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) - self.write_debug('Invoking downloader on "%s"' % urls) + self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable @@ -2811,7 +3126,7 @@ class YoutubeDL(object): return None def process_info(self, info_dict): - """Process a single resolved IE result. (Modified it in-place)""" + """Process a single resolved IE result. (Modifies it in-place)""" assert info_dict.get('_type', 'video') == 'video' original_infodict = info_dict @@ -2823,7 +3138,18 @@ class YoutubeDL(object): info_dict['__write_download_archive'] = 'ignore' return + # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -2834,8 +3160,13 @@ class YoutubeDL(object): # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) + def check_max_downloads(): + if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): + raise MaxDownloadsReached() + if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + check_max_downloads() return if full_filename is None: @@ -2883,19 +3214,21 @@ class YoutubeDL(object): else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') - except (OSError, IOError): + except OSError: self.report_error('Cannot write annotations file: ' + annofn) return # Write internet shortcut files def _write_link_file(link_type): - if 'webpage_url' not in info_dict: - self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') - return False + url = try_get(info_dict['webpage_url'], iri_to_uri) + if not url: + self.report_warning( + f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') + return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if not self._ensure_dir_exists(encodeFilename(linkfn)): return False @@ -2904,13 +3237,13 @@ class YoutubeDL(object): return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', - newline='\r\n' if link_type == 'url' else '\n') as linkfile: - template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} + with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: + template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True @@ -2930,19 +3263,8 @@ class YoutubeDL(object): for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - - try: - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) - replace_info_dict(new_info) - except PostProcessingError as err: - self.report_error('Preprocessing: %s' % str(err)) - return + new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + replace_info_dict(new_info) if self.params.get('skip_download'): info_dict['filepath'] = temp_filename @@ -2964,40 +3286,24 @@ class YoutubeDL(object): info_dict['ext'] = os.path.splitext(file)[1][1:] return file - success = True + fd, success = None, True + if info_dict.get('protocol') or info_dict.get('url'): + fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') + if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( + info_dict.get('section_start') or info_dict.get('section_end')): + msg = ('This format cannot be partially downloaded' if FFmpegFD.available() + else 'You have requested downloading the video partially, but ffmpeg is not installed') + self.report_error(f'{msg}. Aborting') + return + if info_dict.get('requested_formats') is not None: - - def compatible_formats(formats): - # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them. - video_formats = [format for format in formats if format.get('vcodec') != 'none'] - audio_formats = [format for format in formats if format.get('acodec') != 'none'] - if len(video_formats) > 2 or len(audio_formats) > 2: - return False - - # Check extension - exts = set(format.get('ext') for format in formats) - COMPATIBLE_EXTS = ( - set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), - set(('webm',)), - ) - for ext_sets in COMPATIBLE_EXTS: - if ext_sets.issuperset(exts): - return True - # TODO: Check acodec/vcodec - return False - - requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: - if not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv') if (info_dict['ext'] == 'webm' and info_dict.get('thumbnails') # check with type instead of pp_key, __name__, or isinstance # since we dont want any custom PPs to trigger this - and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721 info_dict['ext'] = 'mkv' self.report_warning( 'webm doesn\'t support embedding a thumbnail, mkv will be used') @@ -3011,27 +3317,28 @@ class YoutubeDL(object): os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) - return '%s.%s' % (filename_wo_ext, ext) + return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) temp_filename = correct_ext(temp_filename) dl_filename = existing_video_file(full_filename, temp_filename) + info_dict['__real_download'] = False + # NOTE: Copy so that original format dicts are not modified + info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats'])) - downloaded = [] merger = FFmpegMergerPP(self) - - fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') + downloaded = [] if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: - for f in requested_formats if fd != FFmpegFD else []: + for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), 'f%s' % f['format_id'], info_dict['ext']) downloaded.append(fname) - info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) + info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: @@ -3041,9 +3348,11 @@ class YoutubeDL(object): 'while also allowing unplayable formats to be downloaded. ' 'The formats won\'t be merged to prevent data corruption.') elif not merger.available: - self.report_warning( - 'You have requested merging of multiple formats but ffmpeg is not installed. ' - 'The formats won\'t be merged.') + msg = 'You have requested merging of multiple formats but ffmpeg is not installed' + if not self.params.get('ignoreerrors'): + self.report_error(f'{msg}. Aborting due to --abort-on-error') + return + self.report_warning(f'{msg}. The formats won\'t be merged') if temp_filename == '-': reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) @@ -3053,7 +3362,7 @@ class YoutubeDL(object): f'You have requested downloading multiple formats to stdout {reason}. ' 'The formats will be streamed one after the other') fname = temp_filename - for f in requested_formats: + for f in info_dict['requested_formats']: new_info = dict(info_dict) del new_info['requested_formats'] new_info.update(f) @@ -3094,12 +3403,13 @@ class YoutubeDL(object): except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return - except (OSError, IOError) as err: + except OSError as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return + self._raise_pending_errors(info_dict) if success and full_filename != '-': def fixup(): @@ -3110,16 +3420,16 @@ class YoutubeDL(object): if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': - do_fixup = False + do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): - if not cndn: + if not (do_fixup and cndn): return - if not do_fixup: + elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) @@ -3129,30 +3439,33 @@ class YoutubeDL(object): self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') stretched_ratio = info_dict.get('stretched_ratio') - ffmpeg_fixup( - stretched_ratio not in (1, None), - f'Non-uniform pixel ratio {stretched_ratio}', - FFmpegFixupStretchedPP) - - ffmpeg_fixup( - (info_dict.get('requested_formats') is None - and info_dict.get('container') == 'm4a_dash' - and info_dict.get('ext') == 'm4a'), - 'writing DASH m4a. Only some players support this container', - FFmpegFixupM4aPP) + ffmpeg_fixup(stretched_ratio not in (1, None), + f'Non-uniform pixel ratio {stretched_ratio}', + FFmpegFixupStretchedPP) downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None - downloader = downloader.__name__ if downloader else None + downloader = downloader.FD_NAME if downloader else None - if info_dict.get('requested_formats') is None: # Not necessary if doing merger - ffmpeg_fixup(downloader == 'HlsFD', + ext = info_dict.get('ext') + postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any(( + isinstance(pp, FFmpegVideoConvertorPP) + and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None) + ) for pp in self._pps['post_process']) + + if not postprocessed_by_ffmpeg: + ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a' + and info_dict.get('container') == 'm4a_dash', + 'writing DASH m4a. Only some players support this container', + FFmpegFixupM4aPP) + ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') + or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) - ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', + ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) fixup() try: @@ -3168,15 +3481,10 @@ class YoutubeDL(object): return info_dict['__write_download_archive'] = True + assert info_dict is original_infodict # Make sure the info_dict was modified in-place if self.params.get('force_write_download_archive'): info_dict['__write_download_archive'] = True - - # Make sure the info_dict was modified in-place - assert info_dict is original_infodict - - max_downloads = self.params.get('max_downloads') - if max_downloads is not None and self._num_downloads >= int(max_downloads): - raise MaxDownloadsReached() + check_max_downloads() def __download_wrapper(self, func): @functools.wraps(func) @@ -3185,13 +3493,11 @@ class YoutubeDL(object): res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) - except MaxDownloadsReached as e: - self.to_screen(f'[info] {e}') - raise except DownloadCancelled as e: self.to_screen(f'[info] {e}') if not self.params.get('break_per_url'): raise + self._num_downloads = 0 else: if self.params.get('dump_single_json', False): self.post_extract(res) @@ -3201,7 +3507,7 @@ class YoutubeDL(object): def download(self, url_list): """Download a given list of URLs.""" url_list = variadic(url_list) # Passing a single URL is a common mistake - outtmpl = self.outtmpl_dict['default'] + outtmpl = self.params['outtmpl']['default'] if (len(url_list) > 1 and outtmpl != '-' and '%' not in outtmpl @@ -3219,18 +3525,19 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) - try: - self.__download_wrapper(self.process_ie_result)(info, download=True) - except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: - if not isinstance(e, EntryNotInPlaylist): - self.to_stderr('\r') - webpage_url = info.get('webpage_url') - if webpage_url is not None: + infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) + for info in variadic(json.loads('\n'.join(f)))] + for info in infos: + try: + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') + webpage_url = info.get('webpage_url') + if webpage_url is None: + raise self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') - return self.download([webpage_url]) - else: - raise + self.download([webpage_url]) return self._download_retcode @staticmethod @@ -3240,17 +3547,21 @@ class YoutubeDL(object): return info_dict info_dict.setdefault('epoch', int(time.time())) info_dict.setdefault('_type', 'video') - remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict - keep_keys = ['_type'] # Always keep this to facilitate load-info-json + info_dict.setdefault('_version', { + 'version': __version__, + 'current_git_head': current_git_head(), + 'release_git_head': RELEASE_GIT_HEAD, + 'repository': REPOSITORY, + }) + if remove_private_keys: - remove_keys |= { + reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', - 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url', + 'playlist_autonumber', '_format_sort_fields', } - reject = lambda k, v: k not in keep_keys and ( - k.startswith('_') or k in remove_keys or v is None) else: - reject = lambda k, v: k in remove_keys + reject = lambda k, v: False def filter_fn(obj): if isinstance(obj, dict): @@ -3269,6 +3580,17 @@ class YoutubeDL(object): ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) + def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): + for filename in set(filter(None, files_to_delete)): + if msg: + self.to_screen(msg % filename) + try: + os.remove(filename) + except OSError: + self.report_warning(f'Unable to delete file {filename}') + if filename in info.get('__files_to_move', []): # NB: Delete even if None + del info['__files_to_move'][filename] + @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): @@ -3277,14 +3599,8 @@ class YoutubeDL(object): actual_post_extract(video_dict or {}) return - post_extractor = info_dict.get('__post_extractor') or (lambda: {}) - extra = post_extractor().items() - info_dict.update(extra) - info_dict.pop('__post_extractor', None) - - original_infodict = info_dict.get('__original_infodict') or {} - original_infodict.update(extra) - original_infodict.pop('__post_extractor', None) + post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) @@ -3307,18 +3623,13 @@ class YoutubeDL(object): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: - for old_filename in set(files_to_delete): - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded original file') - if old_filename in infodict['__files_to_move']: - del infodict['__files_to_move'][old_filename] + self._delete_downloaded_files( + *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info @@ -3326,7 +3637,12 @@ class YoutubeDL(object): def pre_process(self, ie_info, key='pre_process', files_to_move=None): info = dict(ie_info) info['__files_to_move'] = files_to_move or {} - info = self.run_all_pps(key, info) + try: + info = self.run_all_pps(key, info) + except PostProcessingError as err: + msg = f'Preprocessing: {err}' + info.setdefault('__pending_error', msg) + self.report_error(msg, is_error=False) return info, info.pop('__files_to_move', None) def post_process(self, filename, info, files_to_move=None): @@ -3356,18 +3672,15 @@ class YoutubeDL(object): break else: return - return '%s %s' % (extractor.lower(), video_id) + return make_archive_id(extractor, video_id) def in_download_archive(self, info_dict): - fn = self.params.get('download_archive') - if fn is None: + if not self.archive: return False - vid_id = self._make_archive_id(info_dict) - if not vid_id: - return False # Incomplete video information - - return vid_id in self.archive + vid_ids = [self._make_archive_id(info_dict)] + vid_ids.extend(info_dict.get('_old_archive_ids') or []) + return any(id_ in self.archive for id_ in vid_ids) def record_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -3375,9 +3688,11 @@ class YoutubeDL(object): return vid_id = self._make_archive_id(info_dict) assert vid_id + self.write_debug(f'Adding to archive: {vid_id}') - with locked_file(fn, 'a', encoding='utf-8') as archive_file: - archive_file.write(vid_id + '\n') + if is_path_like(fn): + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + '\n') self.archive.add(vid_id) @staticmethod @@ -3396,7 +3711,7 @@ class YoutubeDL(object): def _list_format_headers(self, *headers): if self.params.get('listformats_table', True) is not False: - return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return [self._format_out(header, self.Styles.HEADERS) for header in headers] return headers def _format_note(self, fdict): @@ -3459,11 +3774,17 @@ class YoutubeDL(object): res += '~' + format_bytes(fdict['filesize_approx']) return res - def render_formats_table(self, info_dict): - if not info_dict.get('formats') and not info_dict.get('url'): - return None + def _get_formats(self, info_dict): + if info_dict.get('formats') is None: + if info_dict.get('url') and info_dict.get('_type', 'video') == 'video': + return [info_dict] + return [] + return info_dict['formats'] - formats = info_dict.get('formats', [info_dict]) + def render_formats_table(self, info_dict): + formats = self._get_formats(info_dict) + if not formats: + return if not self.params.get('listformats_table', True) is not False: table = [ [ @@ -3471,46 +3792,61 @@ class YoutubeDL(object): format_field(f, 'ext'), self.format_resolution(f), self._format_note(f) - ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) - delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) + def simplified_codec(f, field): + assert field in ('acodec', 'vcodec') + codec = f.get(field) + if not codec: + return 'unknown' + elif codec != 'none': + return '.'.join(codec.split('.')[:4]) + + if field == 'vcodec' and f.get('acodec') == 'none': + return 'images' + elif field == 'acodec' and f.get('vcodec') == 'none': + return '' + return self._format_out('audio only' if field == 'vcodec' else 'video only', + self.Styles.SUPPRESS) + + delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ - self._format_screen(format_field(f, 'format_id'), self.Styles.ID), + self._format_out(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), - format_field(f, 'fps', '\t%d'), + format_field(f, 'fps', '\t%d', func=round), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), - delim, - format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), - format_field(f, 'tbr', '\t%dk'), + format_field(f, 'audio_channels', '\t%s'), + delim, ( + format_field(f, 'filesize', ' \t%s', func=format_bytes) + or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes) + or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))), + None, self._format_out('~\t%s', self.Styles.SUPPRESS))), + format_field(f, 'tbr', '\t%dk', func=round), shorten_protocol_name(f.get('protocol', '')), delim, - format_field(f, 'vcodec', default='unknown').replace( - 'none', 'images' if f.get('acodec') == 'none' - else self._format_screen('audio only', self.Styles.SUPPRESS)), - format_field(f, 'vbr', '\t%dk'), - format_field(f, 'acodec', default='unknown').replace( - 'none', '' if f.get('vcodec') == 'none' - else self._format_screen('video only', self.Styles.SUPPRESS)), - format_field(f, 'abr', '\t%dk'), - format_field(f, 'asr', '\t%dHz'), - join_nonempty( - self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, - format_field(f, 'language', '[%s]'), - join_nonempty(format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), - delim=' '), + simplified_codec(f, 'vcodec'), + format_field(f, 'vbr', '\t%dk', func=round), + simplified_codec(f, 'acodec'), + format_field(f, 'abr', '\t%dk', func=round), + format_field(f, 'asr', '\t%s', func=format_decimal_suffix), + join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty( + self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, + (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' + else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( - 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') return render_table( header_line, table, hide_empty=True, - delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)) + delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True)) def render_thumbnails_table(self, info_dict): thumbnails = list(info_dict.get('thumbnails') or []) @@ -3518,7 +3854,7 @@ class YoutubeDL(object): return None return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), - [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) + [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails]) def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): @@ -3551,28 +3887,39 @@ class YoutubeDL(object): def list_subtitles(self, video_id, subtitles, name='subtitles'): self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) - def urlopen(self, req): - """ Start an HTTP download """ - if isinstance(req, compat_basestring): - req = sanitized_Request(req) - return self._opener.open(req, timeout=self._socket_timeout) - def print_debug_header(self): if not self.params.get('verbose'): return + from . import _IN_CLI # Must be delayed import + + # These imports can be slow. So import them only as needed + from .extractor.extractors import _LAZY_LOADER + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) + def get_encoding(stream): - ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) + ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + additional_info = [] + if os.environ.get('TERM', '').lower() == 'dumb': + additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .compat import WINDOWS_VT_MODE - ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' + from .utils import WINDOWS_VT_MODE # Must be imported locally + additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + if additional_info: + ret = f'{ret} ({",".join(additional_info)})' return ret - encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( + encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - get_encoding(self._screen_file), get_encoding(self._err_file), - self.get_encoding()) + self.get_encoding(), + ', '.join( + f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ + if stream is not None and key != 'console') + ) logger = self.params.get('logger') if logger: @@ -3583,55 +3930,36 @@ class YoutubeDL(object): write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() + if VARIANT not in (None, 'pip'): + source += '*' + klass = type(self) write_debug(join_nonempty( - 'yt-dlp version', __version__, - f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', + f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version', + f'{CHANNEL}@{__version__}', + f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '', '' if source == 'unknown' else f'({source})', + '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}', delim=' ')) + + if not _IN_CLI: + write_debug(f'params: {self.params}') + if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) - if self.params.get('compat_opts'): - write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) + if self.params['compat_opts']: + write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) - if source == 'source': - try: - sp = Popen( - ['git', 'rev-parse', '--short', 'HEAD'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = sp.communicate_or_kill() - out = out.decode().strip() - if re.match('[0-9a-f]+', out): - write_debug('Git HEAD: %s' % out) - except Exception: - try: - sys.exc_clear() - except Exception: - pass - - def python_implementation(): - impl_name = platform.python_implementation() - if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'): - return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] - return impl_name - - write_debug('Python version %s (%s %s) - %s' % ( - platform.python_version(), - python_implementation(), - platform.architecture()[0], - platform_name())) + if current_git_head(): + write_debug(f'Git HEAD: {current_git_head()}') + write_debug(system_identifier()) exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) + exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() @@ -3640,84 +3968,146 @@ class YoutubeDL(object): ) or 'none' write_debug('exe versions: %s' % exe_str) - from .downloader.websocket import has_websockets - from .postprocessor.embedthumbnail import has_mutagen - from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE + from .compat.compat_utils import get_package_info + from .dependencies import available_dependencies - lib_str = join_nonempty( - compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], - SECRETSTORAGE_AVAILABLE and 'secretstorage', - has_mutagen and 'mutagen', - SQLITE_AVAILABLE and 'sqlite', - has_websockets and 'websockets', - delim=', ') or 'none' - write_debug('Optional libraries: %s' % lib_str) + write_debug('Optional libraries: %s' % (', '.join(sorted({ + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() + })) or 'none')) - proxy_map = {} - for handler in self._opener.handlers: - if hasattr(handler, 'proxies'): - proxy_map.update(handler.proxies) - write_debug(f'Proxy map: {proxy_map}') + write_debug(f'Proxy map: {self.proxies}') + # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') # Not implemented if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') + 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' 'See https://yt-dl.org/update if you need help updating.' % latest_version) - def _setup_opener(self): - timeout_val = self.params.get('socket_timeout') - self._socket_timeout = 20 if timeout_val is None else float(timeout_val) - - opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') - opts_cookiefile = self.params.get('cookiefile') + @functools.cached_property + def proxies(self): + """Global proxy configuration""" opts_proxy = self.params.get('proxy') - - self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) - - cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: if opts_proxy == '': - proxies = {} - else: - proxies = {'http': opts_proxy, 'https': opts_proxy} + opts_proxy = '__noproxy__' + proxies = {'all': opts_proxy} else: - proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) + proxies = urllib.request.getproxies() + # compat. Set HTTPS_PROXY to __noproxy__ to revert if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] - proxy_handler = PerRequestProxyHandler(proxies) - debuglevel = 1 if self.params.get('debug_printtraffic') else 0 - https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) - ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) - redirect_handler = YoutubeDLRedirectHandler() - data_handler = compat_urllib_request_DataHandler() + return proxies - # When passing our own FileHandler instance, build_opener won't add the - # default FileHandler and allows us to disable the file protocol, which - # can be used for malicious purposes (see - # https://github.com/ytdl-org/youtube-dl/issues/8227) - file_handler = compat_urllib_request.FileHandler() + @functools.cached_property + def cookiejar(self): + """Global cookiejar instance""" + return load_cookies( + self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) - def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') - file_handler.file_open = file_open + @property + def _opener(self): + """ + Get a urllib OpenerDirector from the Urllib handler (deprecated). + """ + self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()') + handler = self._request_director.handlers['Urllib'] + return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) - opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) + def urlopen(self, req): + """ Start an HTTP download """ + if isinstance(req, str): + req = Request(req) + elif isinstance(req, urllib.request.Request): + self.deprecation_warning( + 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. ' + 'Use yt_dlp.networking.common.Request instead.') + req = urllib_req_to_req(req) + assert isinstance(req, Request) - # Delete the default user-agent header, which would otherwise apply in - # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) - opener.addheaders = [] - self._opener = opener + # compat: Assume user:pass url params are basic auth + url, basic_auth_header = extract_basic_auth(req.url) + if basic_auth_header: + req.headers['Authorization'] = basic_auth_header + req.url = sanitize_url(url) + + clean_proxies(proxies=req.proxies, headers=req.headers) + clean_headers(req.headers) + + try: + return self._request_director.send(req) + except NoSupportingHandlers as e: + for ue in e.unsupported_errors: + if not (ue.handler and ue.msg): + continue + if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower(): + raise RequestError( + 'file:// URLs are disabled by default in yt-dlp for security reasons. ' + 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue + raise + except SSLError as e: + if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e): + raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e + elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e): + raise RequestError( + 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. ' + 'Try using --legacy-server-connect', cause=e) from e + raise + except HTTPError as e: # TODO: Remove in a future release + raise _CompatHTTPError(e) from e + + def build_request_director(self, handlers, preferences=None): + logger = _YDLLogger(self) + headers = self.params['http_headers'].copy() + proxies = self.proxies.copy() + clean_headers(headers) + clean_proxies(proxies, headers) + + director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic')) + for handler in handlers: + director.add_handler(handler( + logger=logger, + headers=headers, + cookiejar=self.cookiejar, + proxies=proxies, + prefer_system_certs='no-certifi' in self.params['compat_opts'], + verify=not self.params.get('nocheckcertificate'), + **traverse_obj(self.params, { + 'verbose': 'debug_printtraffic', + 'source_address': 'source_address', + 'timeout': 'socket_timeout', + 'legacy_ssl_support': 'legacyserverconnect', + 'enable_file_urls': 'enable_file_urls', + 'client_cert': { + 'client_certificate': 'client_certificate', + 'client_certificate_key': 'client_certificate_key', + 'client_certificate_password': 'client_certificate_password', + }, + }), + )) + director.preferences.update(preferences or []) + return director def encode(self, s): if isinstance(s, bytes): @@ -3736,7 +4126,7 @@ class YoutubeDL(object): return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, False = skip, None = error ''' + ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -3748,14 +4138,15 @@ class YoutubeDL(object): return None elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') - else: - self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') - try: - write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) - except (OSError, IOError): - self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') - return None - return True + return 'exists' + + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + return True + except OSError: + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None def _write_description(self, label, ie_result, descfn): ''' Write description and returns True = written, False = skip, None = error ''' @@ -3769,14 +4160,14 @@ class YoutubeDL(object): elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True @@ -3785,15 +4176,18 @@ class YoutubeDL(object): ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There are no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -3810,12 +4204,12 @@ class YoutubeDL(object): try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None @@ -3826,9 +4220,12 @@ class YoutubeDL(object): sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' - raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err) - self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + if not self.params.get('ignoreerrors'): + self.report_error(msg) + raise DownloadError(msg) + self.report_warning(msg) return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): @@ -3837,6 +4234,9 @@ class YoutubeDL(object): thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There are no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: @@ -3860,15 +4260,18 @@ class YoutubeDL(object): else: self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: - uf = self.urlopen(t['url']) + uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: + if isinstance(err, HTTPError) and err.status == 404: + self.to_screen(f'[info] {thumb_display_id.title()} does not exist') + else: + self.report_warning(f'Unable to download {thumb_display_id}: {err}') thumbnails.pop(idx) - self.report_warning(f'Unable to download {thumb_display_id}: {err}') if ret and not write_all: break return ret diff --git a/plugins/youtube_download/yt_dlp/__init__.py b/plugins/youtube_download/yt_dlp/__init__.py index b93f47e..991dbcd 100644 --- a/plugins/youtube_download/yt_dlp/__init__.py +++ b/plugins/youtube_download/yt_dlp/__init__.py @@ -1,57 +1,30 @@ -#!/usr/bin/env python3 -# coding: utf-8 - -f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 +try: + import contextvars # noqa: F401 +except Exception: + raise Exception( + f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541 __license__ = 'Public Domain' -import codecs -import io +import collections +import getpass import itertools +import optparse import os -import random import re import sys +import traceback -from .options import ( - parseOpts, -) -from .compat import ( - compat_getpass, - compat_os_name, - compat_shlex_quote, - workaround_optparse_bug9161, -) +from .compat import compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS -from .utils import ( - DateRange, - decodeOption, - DownloadCancelled, - DownloadError, - error_to_compat_str, - expand_path, - GeoUtils, - float_or_none, - int_or_none, - match_filter_func, - parse_duration, - preferredencoding, - read_batch_urls, - render_table, - SameFileError, - setproctitle, - std_headers, - write_string, -) -from .update import run_update -from .downloader import ( - FileDownloader, -) -from .extractor import gen_extractors, list_extractors -from .extractor.common import InfoExtractor +from .downloader.external import get_external_downloader +from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO +from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, + FFmpegMergerPP, + FFmpegPostProcessor, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, FFmpegVideoConvertorPP, @@ -59,256 +32,106 @@ from .postprocessor import ( MetadataFromFieldPP, MetadataParserPP, ) +from .update import Updater +from .utils import ( + NO_DEFAULT, + POSTPROCESS_WHEN, + DateRange, + DownloadCancelled, + DownloadError, + FormatSorter, + GeoUtils, + PlaylistEntries, + SameFileError, + decodeOption, + download_range_func, + expand_path, + float_or_none, + format_field, + int_or_none, + match_filter_func, + parse_bytes, + parse_duration, + preferredencoding, + read_batch_urls, + read_stdin, + render_table, + setproctitle, + traverse_obj, + variadic, + write_string, +) +from .utils.networking import std_headers from .YoutubeDL import YoutubeDL +_IN_CLI = False -def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) - workaround_optparse_bug9161() +def _exit(status=0, *args): + for msg in args: + sys.stderr.write(msg) + raise SystemExit(status) - setproctitle('yt-dlp') - - parser, opts, args = parseOpts(argv) - warnings, deprecation_warnings = [], [] - - # Set user agent - if opts.user_agent is not None: - std_headers['User-Agent'] = opts.user_agent - - # Set referer - if opts.referer is not None: - std_headers['Referer'] = opts.referer - - # Custom HTTP headers - std_headers.update(opts.headers) - - # Dump user agent - if opts.dump_user_agent: - write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) - sys.exit(0) +def get_urls(urls, batchfile, verbose): # Batch file verification batch_urls = [] - if opts.batchfile is not None: + if batchfile is not None: try: - if opts.batchfile == '-': - write_string('Reading URLs from stdin - EOF (%s) to end:\n' % ( - 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D')) - batchfd = sys.stdin - else: - batchfd = io.open( - expand_path(opts.batchfile), - 'r', encoding='utf-8', errors='ignore') - batch_urls = read_batch_urls(batchfd) - if opts.verbose: + batch_urls = read_batch_urls( + read_stdin('URLs') if batchfile == '-' + else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) + if verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') - except IOError: - sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) - all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls + except OSError: + _exit(f'ERROR: batch file {batchfile} could not be read') _enc = preferredencoding() - all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] + return [ + url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() + for url in batch_urls + urls] + +def print_extractor_information(opts, urls): + out = '' if opts.list_extractors: - for ie in list_extractors(opts.age_limit): - write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) - matchedUrls = [url for url in all_urls if ie.suitable(url)] - for mu in matchedUrls: - write_string(' ' + mu + '\n', out=sys.stdout) - sys.exit(0) - if opts.list_extractor_descriptions: - for ie in list_extractors(opts.age_limit): - if not ie.working(): - continue - desc = getattr(ie, 'IE_DESC', ie.IE_NAME) - if desc is False: - continue - if getattr(ie, 'SEARCH_KEY', None) is not None: - _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') - _COUNTS = ('', '5', '10', 'all') - desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' - write_string(desc + '\n', out=sys.stdout) - sys.exit(0) - if opts.ap_list_mso: - table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] - write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) - sys.exit(0) + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE - # Conflicting, missing and erroneous options - if opts.format == 'best': - warnings.append('.\n '.join(( - '"-f best" selects the best pre-merged format which is often not the best option', - 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', - 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) - if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd: - parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"') - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error('using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error('account username missing\n') - if opts.ap_password is not None and opts.ap_username is None: - parser.error('TV Provider account username missing\n') - if opts.autonumber_size is not None: - if opts.autonumber_size <= 0: - parser.error('auto number size must be positive') - if opts.autonumber_start is not None: - if opts.autonumber_start < 0: - parser.error('auto number start must be positive or 0') - if opts.username is not None and opts.password is None: - opts.password = compat_getpass('Type account password and press [Return]: ') - if opts.ap_username is not None and opts.ap_password is None: - opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.throttledratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.throttledratelimit = numeric_limit - if opts.min_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) - if numeric_limit is None: - parser.error('invalid min_filesize specified') - opts.min_filesize = numeric_limit - if opts.max_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) - if numeric_limit is None: - parser.error('invalid max_filesize specified') - opts.max_filesize = numeric_limit - if opts.sleep_interval is not None: - if opts.sleep_interval < 0: - parser.error('sleep interval must be positive or 0') - if opts.max_sleep_interval is not None: - if opts.max_sleep_interval < 0: - parser.error('max sleep interval must be positive or 0') - if opts.sleep_interval is None: - parser.error('min sleep interval must be specified, use --min-sleep-interval') - if opts.max_sleep_interval < opts.sleep_interval: - parser.error('max sleep interval must be greater than or equal to min sleep interval') + urls = dict.fromkeys(urls, False) + for ie in list_extractor_classes(opts.age_limit): + out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + out += ''.join(f' {url}\n' for url in matched_urls) + elif opts.list_extractor_descriptions: + _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') + out = '\n'.join( + ie.description(markdown=False, search_examples=_SEARCHES) + for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) + elif opts.ap_list_mso: + out = 'Supported TV Providers:\n%s\n' % render_table( + ['mso', 'mso name'], + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) else: - opts.max_sleep_interval = opts.sleep_interval - if opts.sleep_interval_subtitles is not None: - if opts.sleep_interval_subtitles < 0: - parser.error('subtitles sleep interval must be positive or 0') - if opts.sleep_interval_requests is not None: - if opts.sleep_interval_requests < 0: - parser.error('requests sleep interval must be positive or 0') - if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') - if opts.overwrites: # --yes-overwrites implies --no-continue - opts.continue_dl = False - if opts.concurrent_fragment_downloads <= 0: - parser.error('Concurrent fragments must be positive') - if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) - if min_wait is None or (max_wait is None and '-' in opts.wait_for_video): - parser.error('Invalid time range to wait') - elif max_wait is not None and max_wait < min_wait: - parser.error('Minimum time range to wait must not be longer than the maximum') - opts.wait_for_video = (min_wait, max_wait) + return False + write_string(out, out=sys.stdout) + return True - def parse_retries(retries, name=''): - if retries in ('inf', 'infinite'): - parsed_retries = float('inf') - else: - try: - parsed_retries = int(retries) - except (TypeError, ValueError): - parser.error('invalid %sretry count specified' % name) - return parsed_retries - if opts.retries is not None: - opts.retries = parse_retries(opts.retries) - if opts.file_access_retries is not None: - opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ') - if opts.fragment_retries is not None: - opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ') - if opts.extractor_retries is not None: - opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ') - if opts.buffersize is not None: - numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) - if numeric_buffersize is None: - parser.error('invalid buffer size specified') - opts.buffersize = numeric_buffersize - if opts.http_chunk_size is not None: - numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) - if not numeric_chunksize: - parser.error('invalid http chunk size specified') - opts.http_chunk_size = numeric_chunksize - if opts.playliststart <= 0: - raise parser.error('Playlist start must be positive') - if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: - raise parser.error('Playlist end must be greater than playlist start') - if opts.extractaudio: - opts.audioformat = opts.audioformat.lower() - if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): - parser.error('invalid audio format specified') - if opts.audioquality: - opts.audioquality = opts.audioquality.strip('k').strip('K') - audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan - if audioquality is None or audioquality < 0: - parser.error('invalid audio quality specified') - if opts.recodevideo is not None: - opts.recodevideo = opts.recodevideo.replace(' ', '') - if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): - parser.error('invalid video remux format specified') - if opts.remuxvideo is not None: - opts.remuxvideo = opts.remuxvideo.replace(' ', '') - if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): - parser.error('invalid video remux format specified') - if opts.convertsubtitles is not None: - if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: - parser.error('invalid subtitle format specified') - if opts.convertthumbnails is not None: - if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: - parser.error('invalid thumbnail format specified') - if opts.cookiesfrombrowser is not None: - mobj = re.match(r'(?P [^+:]+)(\s*\+\s*(?P [^:]+))?(\s*:(?P .+))?', opts.cookiesfrombrowser) - if mobj is None: - parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') - browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') - browser_name = browser_name.lower() - if browser_name not in SUPPORTED_BROWSERS: - parser.error(f'unsupported browser specified for cookies: "{browser_name}". ' - f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') - if keyring is not None: - keyring = keyring.upper() - if keyring not in SUPPORTED_KEYRINGS: - parser.error(f'unsupported keyring specified for cookies: "{keyring}". ' - f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') - opts.cookiesfrombrowser = (browser_name, profile, keyring) - geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country - if geo_bypass_code is not None: - try: - GeoUtils.random_ipv4(geo_bypass_code) - except Exception: - parser.error('unsupported geo-bypass country or ip-block') - - if opts.date is not None: - date = DateRange.day(opts.date) - else: - date = DateRange(opts.dateafter, opts.datebefore) - - compat_opts = opts.compat_opts - - def report_conflict(arg1, arg2): - warnings.append(f'{arg2} is ignored since {arg1} was given') +def set_compat_opts(opts): def _unused_compat_opt(name): - if name not in compat_opts: + if name not in opts.compat_opts: return False - compat_opts.discard(name) - compat_opts.update(['*%s' % name]) + opts.compat_opts.discard(name) + opts.compat_opts.update(['*%s' % name]) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): attr = getattr(opts, opt_name) - if compat_name in compat_opts: + if compat_name in opts.compat_opts: if attr is None: setattr(opts, opt_name, not default) return True @@ -323,36 +146,160 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') - if 'no-attach-info-json' in compat_opts: + if 'no-attach-info-json' in opts.compat_opts: if opts.embed_infojson: _unused_compat_opt('no-attach-info-json') else: opts.embed_infojson = False - if 'format-sort' in compat_opts: - opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + if 'format-sort' in opts.compat_opts: + opts.format_sort.extend(FormatSorter.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') - outtmpl_default = opts.outtmpl.get('default') - if outtmpl_default == '': - outtmpl_default, opts.skip_download = None, True - del opts.outtmpl['default'] - if opts.useid: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' - else: - report_conflict('--output', '--id') - if 'filename' in compat_opts: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' + if 'filename' in opts.compat_opts: + if opts.outtmpl.get('default') is None: + opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) else: _unused_compat_opt('filename') + +def validate_options(opts): + def validate(cndn, name, value=None, msg=None): + if cndn: + return True + raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) + + def validate_in(name, value, items, msg=None): + return validate(value is None or value in items, name, value, msg) + + def validate_regex(name, value, regex): + return validate(value is None or re.match(regex, value), name, value) + + def validate_positive(name, value, strict=False): + return validate(value is None or value > 0 or (not strict and value == 0), + name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) + + def validate_minmax(min_val, max_val, min_name, max_name=None): + if max_val is None or min_val is None or max_val >= min_val: + return + if not max_name: + min_name, max_name = f'min {min_name}', f'max {min_name}' + raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') + + # Usernames and passwords + validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc', + msg='{name}, netrc command and username/password are mutually exclusive options') + validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') + validate(opts.ap_password is None or opts.ap_username is not None, + 'TV Provider account username', msg='{name} missing') + validate_in('TV Provider', opts.ap_mso, MSO_INFO, + 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') + + # Numbers + validate_positive('autonumber start', opts.autonumber_start) + validate_positive('autonumber size', opts.autonumber_size, True) + validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) + validate_positive('playlist start', opts.playliststart, True) + if opts.playlistend != -1: + validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') + + # Time ranges + validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) + validate_positive('requests sleep interval', opts.sleep_interval_requests) + validate_positive('sleep interval', opts.sleep_interval) + validate_positive('max sleep interval', opts.max_sleep_interval) + if opts.sleep_interval is None: + validate( + opts.max_sleep_interval is None, 'min sleep interval', + msg='{name} must be specified; use --min-sleep-interval') + elif opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + else: + validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') + + if opts.wait_for_video is not None: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), + 'time range to wait for video', opts.wait_for_video) + validate_minmax(min_wait, max_wait, 'time range to wait for video') + opts.wait_for_video = (min_wait, max_wait) + + # Format sort + for f in opts.format_sort: + validate_regex('format sorting', f, FormatSorter.regex) + + # Postprocessor formats + validate_regex('merge output format', opts.merge_output_format, + r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) + validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) + validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) + validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) + validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) + validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) + if opts.audioquality: + opts.audioquality = opts.audioquality.strip('k').strip('K') + # int_or_none prevents inf, nan + validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) + + # Retries + def parse_retries(name, value): + if value is None: + return None + elif value in ('inf', 'infinite'): + return float('inf') + try: + return int(value) + except (TypeError, ValueError): + validate(False, f'{name} retry count', value) + + opts.retries = parse_retries('download', opts.retries) + opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) + opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) + opts.file_access_retries = parse_retries('file access', opts.file_access_retries) + + # Retry sleep function + def parse_sleep_func(expr): + NUMBER_RE = r'\d+(?:\.\d+)?' + op, start, limit, step, *_ = tuple(re.fullmatch( + rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', + expr.strip()).groups()) + (None, None) + + if op == 'exp': + return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) + else: + default_step = start if op or limit else 0 + return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf')) + + for key, expr in opts.retry_sleep.items(): + if not expr: + del opts.retry_sleep[key] + continue + try: + opts.retry_sleep[key] = parse_sleep_func(expr) + except AttributeError: + raise ValueError(f'invalid {key} retry sleep expression {expr!r}') + + # Bytes + def validate_bytes(name, value): + if value is None: + return None + numeric_limit = parse_bytes(value) + validate(numeric_limit is not None, 'rate limit', value) + return numeric_limit + + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) + opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) + + # Output templates def validate_outtmpl(tmpl, msg): err = YoutubeDL.validate_outtmpl(tmpl) if err: - parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) + raise ValueError(f'invalid {msg} "{tmpl}": {err}') for k, tmpl in opts.outtmpl.items(): validate_outtmpl(tmpl, f'{k} output template') @@ -361,32 +308,93 @@ def _real_main(argv=None): validate_outtmpl(tmpl, f'{type_} print template') for type_, tmpl_list in opts.print_to_file.items(): for tmpl, file in tmpl_list: - validate_outtmpl(tmpl, f'{type_} print-to-file template') - validate_outtmpl(file, f'{type_} print-to-file filename') + validate_outtmpl(tmpl, f'{type_} print to file template') + validate_outtmpl(file, f'{type_} print to file filename') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') for k, tmpl in opts.progress_template.items(): k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' validate_outtmpl(tmpl, f'{k} template') - if opts.extractaudio and not opts.keepvideo and opts.format is None: - opts.format = 'bestaudio/best' + outtmpl_default = opts.outtmpl.get('default') + if outtmpl_default == '': + opts.skip_download = None + del opts.outtmpl['default'] - if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - parser.error('Cannot download a video and extract audio into the same' - ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' - ' template'.format(outtmpl_default)) + def parse_chapters(name, value, advanced=False): + parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) + TIMESTAMP_RE = r'''(?x)(?: + (?P -?)(?P [^-]+) + )?\s*-\s*(?: + (?P -?)(?P [^-]+) + )?''' - for f in opts.format_sort: - if re.match(InfoExtractor.FormatSort.regex, f) is None: - parser.error('invalid format sort string "%s" specified' % f) + chapters, ranges, from_url = [], [], False + for regex in value or []: + if advanced and regex == '*from-url': + from_url = True + continue + elif not regex.startswith('*'): + try: + chapters.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid {name} regex "{regex}" - {err}') + continue + for range_ in map(str.strip, regex[1:].split(',')): + mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) + dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] + signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) + + err = None + if None in (dur or [None]): + err = 'Must be of the form "*start-end"' + elif not advanced and any(signs): + err = 'Negative timestamps are not allowed' + else: + dur[0] *= -1 if signs[0] else 1 + dur[1] *= -1 if signs[1] else 1 + if dur[1] == float('-inf'): + err = '"-inf" is not a valid end' + if err: + raise ValueError(f'invalid {name} time range "{regex}". {err}') + ranges.append(dur) + + return chapters, ranges, from_url + + opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) + opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) + + # Cookies from browser + if opts.cookiesfrombrowser: + container = None + mobj = re.fullmatch(r'''(?x) + (?P [^+:]+) + (?:\s*\+\s*(?P [^:]+))? + (?:\s*:\s*(?!:)(?P .+?))? + (?:\s*::\s*(?P .+))? + ''', opts.cookiesfrombrowser) + if mobj is None: + raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') + browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container') + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' + f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') + if keyring is not None: + keyring = keyring.upper() + if keyring not in SUPPORTED_KEYRINGS: + raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' + f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') + opts.cookiesfrombrowser = (browser_name, profile, keyring, container) + + # MetadataParser def metadataparser_actions(f): if isinstance(f, str): cmd = '--parse-metadata %s' % compat_shlex_quote(f) try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') else: cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) @@ -395,237 +403,137 @@ def _real_main(argv=None): try: MetadataParserPP.validate_action(*action) except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') yield action - if opts.parse_metadata is None: - opts.parse_metadata = [] if opts.metafromtitle is not None: - opts.parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } - any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json - or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail - or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + # Other options + if opts.playlist_items is not None: + try: + tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) + except Exception as err: + raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') - any_printing = opts.print_json - download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive + opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None + if opts.geo_bypass.lower() not in ('default', 'never'): + try: + GeoUtils.random_ipv4(opts.geo_bypass) + except Exception: + raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"') + if len(opts.geo_bypass) == 2: + opts.geo_bypass_country = opts.geo_bypass + else: + opts.geo_bypass_ip_block = opts.geo_bypass + opts.geo_bypass = opts.geo_bypass.lower() != 'never' - # If JSON is not printed anywhere, but comments are requested, save it to file - printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json - if opts.getcomments and not printing_json: - opts.writeinfojson = True + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) + + if opts.download_archive is not None: + opts.download_archive = expand_path(opts.download_archive) + + if opts.ffmpeg_location is not None: + opts.ffmpeg_location = expand_path(opts.ffmpeg_location) + + if opts.user_agent is not None: + opts.headers.setdefault('User-Agent', opts.user_agent) + if opts.referer is not None: + opts.headers.setdefault('Referer', opts.referer) if opts.no_sponsorblock: - opts.sponsorblock_mark = set() - opts.sponsorblock_remove = set() - sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + opts.sponsorblock_mark = opts.sponsorblock_remove = set() - opts.remove_chapters = opts.remove_chapters or [] + default_downloader = None + for proto, path in opts.external_downloader.items(): + if path == 'native': + continue + ed = get_external_downloader(path) + if ed is None: + raise ValueError( + f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') + elif ed and proto == 'default': + default_downloader = ed.get_basename() - if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: - if opts.sponskrub: - if opts.remove_chapters: - report_conflict('--remove-chapters', '--sponskrub') - if opts.sponsorblock_mark: - report_conflict('--sponsorblock-mark', '--sponskrub') - if opts.sponsorblock_remove: - report_conflict('--sponsorblock-remove', '--sponskrub') - opts.sponskrub = False - if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: - report_conflict('--split-chapter', '--sponskrub-cut') - opts.sponskrub_cut = False + for policy in opts.color.values(): + if policy not in ('always', 'auto', 'no_color', 'never'): + raise ValueError(f'"{policy}" is not a valid color policy') - if opts.remuxvideo and opts.recodevideo: - report_conflict('--recode-video', '--remux-video') - opts.remuxvideo = False + warnings, deprecation_warnings = [], [] - if opts.allow_unplayable_formats: - def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): - val = getattr(opts, opt_name) - if (not allowed and val) or (allowed and not allowed(val)): - report_conflict('--allow-unplayable-formats', arg) - setattr(opts, opt_name, default) + # Common mistake: -f best + if opts.format == 'best': + warnings.append('.\n '.join(( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) - report_unplayable_conflict('extractaudio', '--extract-audio') - report_unplayable_conflict('remuxvideo', '--remux-video') - report_unplayable_conflict('recodevideo', '--recode-video') - report_unplayable_conflict('addmetadata', '--embed-metadata') - report_unplayable_conflict('addchapters', '--embed-chapters') - report_unplayable_conflict('embed_infojson', '--embed-info-json') - opts.embed_infojson = False - report_unplayable_conflict('embedsubtitles', '--embed-subs') - report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') - report_unplayable_conflict('xattrs', '--xattrs') - report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) - opts.fixup = 'never' - report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) - report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) - report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) - opts.sponskrub = False + # --(postprocessor/downloader)-args without name + def report_args_compat(name, value, key1, key2=None, where=None): + if key1 in value and key2 not in value: + warnings.append(f'{name.title()} arguments given without specifying name. ' + f'The arguments will be given to {where or f"all {name}s"}') + return True + return False - if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: - opts.addchapters = True + if report_args_compat('external downloader', opts.external_downloader_args, + 'default', where=default_downloader) and default_downloader: + # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1 + opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default')) - # PostProcessors - postprocessors = list(opts.add_postprocessors) - if sponsorblock_query: - postprocessors.append({ - 'key': 'SponsorBlock', - 'categories': sponsorblock_query, - 'api': opts.sponsorblock_api, - # Run this immediately after extraction is complete - 'when': 'pre_process' - }) - if opts.parse_metadata: - postprocessors.append({ - 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - # Run this immediately after extraction is complete - 'when': 'pre_process' - }) - if opts.convertsubtitles: - postprocessors.append({ - 'key': 'FFmpegSubtitlesConvertor', - 'format': opts.convertsubtitles, - # Run this before the actual video download - 'when': 'before_dl' - }) - if opts.convertthumbnails: - postprocessors.append({ - 'key': 'FFmpegThumbnailsConvertor', - 'format': opts.convertthumbnails, - # Run this before the actual video download - 'when': 'before_dl' - }) - if opts.extractaudio: - postprocessors.append({ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': opts.audioformat, - 'preferredquality': opts.audioquality, - 'nopostoverwrites': opts.nopostoverwrites, - }) - if opts.remuxvideo: - postprocessors.append({ - 'key': 'FFmpegVideoRemuxer', - 'preferedformat': opts.remuxvideo, - }) - if opts.recodevideo: - postprocessors.append({ - 'key': 'FFmpegVideoConvertor', - 'preferedformat': opts.recodevideo, - }) - # If ModifyChapters is going to remove chapters, subtitles must already be in the container. - if opts.embedsubtitles: - already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts - postprocessors.append({ - 'key': 'FFmpegEmbedSubtitle', - # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': already_have_subtitle - }) - if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts: - opts.writesubtitles = True - # --all-sub automatically sets --write-sub if --write-auto-sub is not given - # this was the old behaviour if only --all-sub was given. - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True - # ModifyChapters must run before FFmpegMetadataPP - remove_chapters_patterns, remove_ranges = [], [] - for regex in opts.remove_chapters: - if regex.startswith('*'): - dur = list(map(parse_duration, regex[1:].split('-'))) - if len(dur) == 2 and all(t is not None for t in dur): - remove_ranges.append(tuple(dur)) - continue - parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end') - try: - remove_chapters_patterns.append(re.compile(regex)) - except re.error as err: - parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') - if opts.remove_chapters or sponsorblock_query: - postprocessors.append({ - 'key': 'ModifyChapters', - 'remove_chapters_patterns': remove_chapters_patterns, - 'remove_sponsor_segments': opts.sponsorblock_remove, - 'remove_ranges': remove_ranges, - 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, - 'force_keyframes': opts.force_keyframes_at_cuts - }) - # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and - # FFmpegExtractAudioPP as containers before conversion may not support - # metadata (3gp, webm, etc.) - # By default ffmpeg preserves metadata applicable for both - # source and target containers. From this point the container won't change, - # so metadata can be added here. - if opts.addmetadata or opts.addchapters or opts.embed_infojson: - if opts.embed_infojson is None: - opts.embed_infojson = 'if_exists' - postprocessors.append({ - 'key': 'FFmpegMetadata', - 'add_chapters': opts.addchapters, - 'add_metadata': opts.addmetadata, - 'add_infojson': opts.embed_infojson, - }) - # Deprecated - # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment - # but must be below EmbedSubtitle and FFmpegMetadata - # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 - # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found - if opts.sponskrub is not False: - postprocessors.append({ - 'key': 'SponSkrub', - 'path': opts.sponskrub_path, - 'args': opts.sponskrub_args, - 'cut': opts.sponskrub_cut, - 'force': opts.sponskrub_force, - 'ignoreerror': opts.sponskrub is None, - '_from_cli': True, - }) - if opts.embedthumbnail: - postprocessors.append({ - 'key': 'EmbedThumbnail', - # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': opts.writethumbnail - }) - if not opts.writethumbnail: - opts.writethumbnail = True - opts.outtmpl['pl_thumbnail'] = '' - if opts.split_chapters: - postprocessors.append({ - 'key': 'FFmpegSplitChapters', - 'force_keyframes': opts.force_keyframes_at_cuts, - }) - # XAttrMetadataPP should be run after post-processors that may change file contents - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) - if opts.concat_playlist != 'never': - postprocessors.append({ - 'key': 'FFmpegConcat', - 'only_multi_video': opts.concat_playlist != 'always', - 'when': 'playlist', - }) - # Exec must be the last PP of each category - if opts.exec_before_dl_cmd: - opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd) - for when, exec_cmd in opts.exec_cmd.items(): - postprocessors.append({ - 'key': 'Exec', - 'exec_cmd': exec_cmd, - # Run this only after the files have been moved to their final locations - 'when': when, - }) - - def report_args_compat(arg, name): - warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name)) - - if 'default' in opts.external_downloader_args: - report_args_compat('--downloader-args', 'external downloaders') - - if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args: - report_args_compat('--post-processor-args', 'post-processors') + if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): + opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') opts.postprocessor_args.setdefault('sponskrub', []) - opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', + val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): + if val2 is NO_DEFAULT: + val2 = getattr(opts, opt2) + if not val2: + return + + if val1 is NO_DEFAULT: + val1 = getattr(opts, opt1) + if val1: + warnings.append(f'{arg1} is ignored since {arg2} was given') + setattr(opts, opt1, default) + + # Conflicting options + report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random') + report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist') + report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist') + report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) + report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', + '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) + report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) + report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') + report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') + report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', + val1=opts.sponskrub and opts.sponskrub_cut) + + # Conflicts with --allow-unplayable-formats + report_conflict('--embed-metadata', 'addmetadata') + report_conflict('--embed-chapters', 'addchapters') + report_conflict('--embed-info-json', 'embed_infojson') + report_conflict('--embed-subs', 'embedsubtitles') + report_conflict('--embed-thumbnail', 'embedthumbnail') + report_conflict('--extract-audio', 'extractaudio') + report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') + report_conflict('--recode-video', 'recodevideo') + report_conflict('--remove-chapters', 'remove_chapters', default=[]) + report_conflict('--remux-video', 'remuxvideo') + report_conflict('--sponskrub', 'sponskrub') + report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) + report_conflict('--xattrs', 'xattrs') + + # Fully deprecated options def report_deprecation(val, old, new=None): if not val: return @@ -635,23 +543,228 @@ def _real_main(argv=None): report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') - report_deprecation(opts.include_ads, '--include-ads') + # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + # Dependent options + opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) + + if opts.exec_before_dl_cmd: + opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd + + if opts.useid: # --id is not deprecated in youtube-dl + opts.outtmpl['default'] = '%(id)s.%(ext)s' + + if opts.overwrites: # --force-overwrites implies --no-continue + opts.continue_dl = False + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + # Add chapters when adding metadata or marking sponsors + opts.addchapters = True + + if opts.extractaudio and not opts.keepvideo and opts.format is None: + # Do not unnecessarily download audio + opts.format = 'bestaudio/best' + + if opts.getcomments and opts.writeinfojson is None and not opts.embed_infojson: + # If JSON is not printed anywhere, but comments are requested, save it to file + if not opts.dumpjson or opts.print_json or opts.dump_single_json: + opts.writeinfojson = True + + if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + opts.writesubtitles = True + + if opts.addmetadata and opts.embed_infojson is None: + # If embedding metadata and infojson is present, embed it + opts.embed_infojson = 'if_exists' + + # Ask for passwords + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') + + return warnings, deprecation_warnings + + +def get_postprocessors(opts): + yield from opts.add_postprocessors + + for when, actions in opts.parse_metadata.items(): + yield { + 'key': 'MetadataParser', + 'actions': actions, + 'when': when + } + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + if sponsorblock_query: + yield { + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + 'when': 'after_filter' + } + if opts.convertsubtitles: + yield { + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + 'when': 'before_dl' + } + if opts.convertthumbnails: + yield { + 'key': 'FFmpegThumbnailsConvertor', + 'format': opts.convertthumbnails, + 'when': 'before_dl' + } + if opts.extractaudio: + yield { + 'key': 'FFmpegExtractAudio', + 'preferredcodec': opts.audioformat, + 'preferredquality': opts.audioquality, + 'nopostoverwrites': opts.nopostoverwrites, + } + if opts.remuxvideo: + yield { + 'key': 'FFmpegVideoRemuxer', + 'preferedformat': opts.remuxvideo, + } + if opts.recodevideo: + yield { + 'key': 'FFmpegVideoConvertor', + 'preferedformat': opts.recodevideo, + } + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. + if opts.embedsubtitles: + keep_subs = 'no-keep-subs' not in opts.compat_opts + yield { + 'key': 'FFmpegEmbedSubtitle', + # already_have_subtitle = True prevents the file from being deleted after embedding + 'already_have_subtitle': opts.writesubtitles and keep_subs + } + if not opts.writeautomaticsub and keep_subs: + opts.writesubtitles = True + + # ModifyChapters must run before FFmpegMetadataPP + if opts.remove_chapters or sponsorblock_query: + yield { + 'key': 'ModifyChapters', + 'remove_chapters_patterns': opts.remove_chapters, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': opts.remove_ranges, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + } + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + yield { + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, + } + # Deprecated + # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment + # but must be below EmbedSubtitle and FFmpegMetadata + # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: + yield { + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, + 'ignoreerror': opts.sponskrub is None, + '_from_cli': True, + } + if opts.embedthumbnail: + yield { + 'key': 'EmbedThumbnail', + # already_have_thumbnail = True prevents the file from being deleted after embedding + 'already_have_thumbnail': opts.writethumbnail + } + if not opts.writethumbnail: + opts.writethumbnail = True + opts.outtmpl['pl_thumbnail'] = '' + if opts.split_chapters: + yield { + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + } + # XAttrMetadataPP should be run after post-processors that may change file contents + if opts.xattrs: + yield {'key': 'XAttrMetadata'} + if opts.concat_playlist != 'never': + yield { + 'key': 'FFmpegConcat', + 'only_multi_video': opts.concat_playlist != 'always', + 'when': 'playlist', + } + # Exec must be the last PP of each category + for when, exec_cmd in opts.exec_cmd.items(): + yield { + 'key': 'Exec', + 'exec_cmd': exec_cmd, + 'when': when, + } + + +ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts')) + + +def parse_options(argv=None): + """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" + parser, opts, urls = parseOpts(argv) + urls = get_urls(urls, opts.batchfile, opts.verbose) + + set_compat_opts(opts) + try: + warnings, deprecation_warnings = validate_options(opts) + except ValueError as err: + parser.error(f'{err}\n') + + postprocessors = list(get_postprocessors(opts)) + + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) + any_getting = any(getattr(opts, k) for k in ( + 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + )) + if opts.quiet is None: + opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) + + playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] + write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson + and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '') + if not any(( + opts.extract_flat, + opts.dump_single_json, + opts.forceprint.get('playlist'), + opts.print_to_file.get('playlist'), + write_playlist_infojson, + )): + if not playlist_pps: + opts.extract_flat = 'discard' + elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]: + opts.extract_flat = 'discard_in_playlist' + final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS - else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') + else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS) else None) - match_filter = ( - None if opts.match_filter is None - else match_filter_func(opts.match_filter)) - - ydl_opts = { + return ParsedOptions(parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, + 'netrc_cmd': opts.netrc_cmd, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, @@ -659,7 +772,10 @@ def _real_main(argv=None): 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'quiet': (opts.quiet or any_getting or any_printing), + 'client_certificate': opts.client_certificate, + 'client_certificate_key': opts.client_certificate_key, + 'client_certificate_password': opts.client_certificate_password, + 'quiet': opts.quiet, 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -674,7 +790,7 @@ def _real_main(argv=None): 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, 'force_write_download_archive': opts.force_write_download_archive, - 'simulate': (any_getting or None) if opts.simulate is None else opts.simulate, + 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate, 'skip_download': opts.skip_download, 'format': opts.format, 'allow_unplayable_formats': opts.allow_unplayable_formats, @@ -695,6 +811,7 @@ def _real_main(argv=None): 'windowsfilenames': opts.windowsfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, + 'allowed_extractors': opts.allowed_extractors or ['default'], 'ratelimit': opts.ratelimit, 'throttledratelimit': opts.throttledratelimit, 'overwrites': opts.overwrites, @@ -702,6 +819,7 @@ def _real_main(argv=None): 'file_access_retries': opts.file_access_retries, 'fragment_retries': opts.fragment_retries, 'extractor_retries': opts.extractor_retries, + 'retry_sleep_functions': opts.retry_sleep, 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'keep_fragments': opts.keep_fragments, 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, @@ -716,8 +834,9 @@ def _real_main(argv=None): 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, + 'lazy_playlist': opts.lazy_playlist, 'noplaylist': opts.noplaylist, - 'logtostderr': outtmpl_default == '-', + 'logtostderr': opts.outtmpl.get('default') == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, @@ -747,17 +866,18 @@ def _real_main(argv=None): 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, + 'load_pages': opts.load_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'min_views': opts.min_views, 'max_views': opts.max_views, - 'daterange': date, + 'daterange': opts.date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, - 'download_archive': download_archive_fn, + 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, 'break_per_url': opts.break_per_url, @@ -767,6 +887,8 @@ def _real_main(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, + 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, @@ -794,11 +916,13 @@ def _real_main(argv=None): 'max_sleep_interval': opts.max_sleep_interval, 'sleep_interval_subtitles': opts.sleep_interval_subtitles, 'external_downloader': opts.external_downloader, + 'download_ranges': opts.download_ranges, + 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, - 'match_filter': match_filter, - 'no_color': opts.no_color, + 'match_filter': opts.match_filter, + 'color': opts.color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, @@ -812,60 +936,97 @@ def _real_main(argv=None): 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, - 'compat_opts': compat_opts, - } + 'compat_opts': opts.compat_opts, + }) + + +def _real_main(argv=None): + setproctitle('yt-dlp') + + parser, opts, all_urls, ydl_opts = parse_options(argv) + + # Dump user agent + if opts.dump_user_agent: + ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) + write_string(f'{ua}\n', out=sys.stdout) + return + + if print_extractor_information(opts, all_urls): + return + + # We may need ffmpeg_location without having access to the YoutubeDL instance + # See https://github.com/yt-dlp/yt-dlp/issues/2191 + if opts.ffmpeg_location: + FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) with YoutubeDL(ydl_opts) as ydl: + pre_process = opts.update_self or opts.rm_cachedir actual_use = all_urls or opts.load_info_filename - # Remove cache dir if opts.rm_cachedir: ydl.cache.remove() - # Update version - if opts.update_self: - # If updater returns True, exit. Required for windows - if run_update(ydl): - if actual_use: - sys.exit('ERROR: The program must exit for the update to complete') - sys.exit() + try: + updater = Updater(ydl, opts.update_self) + if opts.update_self and updater.update() and actual_use: + if updater.cmd: + return updater.restart() + # This code is reachable only for zip variant in py < 3.10 + # It makes sense to exit here, but the old behavior is to continue + ydl.report_warning('Restart yt-dlp to use the updated version') + # return 100, 'ERROR: The program must exit for the update to complete' + except Exception: + traceback.print_exc() + ydl._download_retcode = 100 - # Maybe do nothing if not actual_use: - if opts.update_self or opts.rm_cachedir: - sys.exit() + if pre_process: + return ydl._download_retcode ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) parser.error( 'You must provide at least one URL.\n' 'Type yt-dlp --help to see a list of all options.') + parser.destroy() try: if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) + if all_urls: + ydl.report_warning('URLs are ignored due to --load-info-json') + return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: - retcode = ydl.download(all_urls) + return ydl.download(all_urls) except DownloadCancelled: ydl.to_screen('Aborting remaining downloads') - retcode = 101 - - sys.exit(retcode) + return 101 def main(argv=None): + global _IN_CLI + _IN_CLI = True try: - _real_main(argv) + _exit(*variadic(_real_main(argv))) except DownloadError: - sys.exit(1) + _exit(1) except SameFileError as e: - sys.exit(f'ERROR: {e}') + _exit(f'ERROR: {e}') except KeyboardInterrupt: - sys.exit('\nERROR: Interrupted by user') + _exit('\nERROR: Interrupted by user') except BrokenPipeError as e: # https://docs.python.org/3/library/signal.html#note-on-sigpipe devnull = os.open(os.devnull, os.O_WRONLY) os.dup2(devnull, sys.stdout.fileno()) - sys.exit(f'\nERROR: {e}') + _exit(f'\nERROR: {e}') + except optparse.OptParseError as e: + _exit(2, f'\n{e}') -__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] +from .extractor import gen_extractors, list_extractors + +__all__ = [ + 'main', + 'YoutubeDL', + 'parse_options', + 'gen_extractors', + 'list_extractors', +] diff --git a/plugins/youtube_download/yt_dlp/__main__.py b/plugins/youtube_download/yt_dlp/__main__.py index c9f4147..78701df 100644 --- a/plugins/youtube_download/yt_dlp/__main__.py +++ b/plugins/youtube_download/yt_dlp/__main__.py @@ -1,13 +1,11 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals # Execute with -# $ python yt_dlp/__main__.py (2.6+) -# $ python -m yt_dlp (2.7+) +# $ python -m yt_dlp import sys -if __package__ is None and not hasattr(sys, 'frozen'): +if __package__ is None and not getattr(sys, 'frozen', False): # direct call of __main__.py import os.path path = os.path.realpath(os.path.abspath(__file__)) diff --git a/plugins/youtube_download/yt_dlp/__pyinstaller/__init__.py b/plugins/youtube_download/yt_dlp/__pyinstaller/__init__.py new file mode 100644 index 0000000..1c52aad --- /dev/null +++ b/plugins/youtube_download/yt_dlp/__pyinstaller/__init__.py @@ -0,0 +1,5 @@ +import os + + +def get_hook_dirs(): + return [os.path.dirname(__file__)] diff --git a/plugins/youtube_download/yt_dlp/__pyinstaller/hook-yt_dlp.py b/plugins/youtube_download/yt_dlp/__pyinstaller/hook-yt_dlp.py new file mode 100644 index 0000000..88c2b8b --- /dev/null +++ b/plugins/youtube_download/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -0,0 +1,32 @@ +import sys + +from PyInstaller.utils.hooks import collect_submodules + + +def pycryptodome_module(): + try: + import Cryptodome # noqa: F401 + except ImportError: + try: + import Crypto # noqa: F401 + print('WARNING: Using Crypto since Cryptodome is not available. ' + 'Install with: pip install pycryptodomex', file=sys.stderr) + return 'Crypto' + except ImportError: + pass + return 'Cryptodome' + + +def get_hidden_imports(): + yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') + yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') + yield pycryptodome_module() + yield from collect_submodules('websockets') + # These are auto-detected, but explicitly add them just in case + yield from ('mutagen', 'brotli', 'certifi') + + +hiddenimports = list(get_hidden_imports()) +print(f'Adding imports: {hiddenimports}') + +excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] diff --git a/plugins/youtube_download/yt_dlp/aes.py b/plugins/youtube_download/yt_dlp/aes.py index b37f0dd..b3a383c 100644 --- a/plugins/youtube_download/yt_dlp/aes.py +++ b/plugins/youtube_download/yt_dlp/aes.py @@ -1,26 +1,18 @@ -from __future__ import unicode_literals - +import base64 from math import ceil -from .compat import ( - compat_b64decode, - compat_ord, - compat_pycrypto_AES, -) -from .utils import ( - bytes_to_intlist, - intlist_to_bytes, -) +from .compat import compat_ord +from .dependencies import Cryptodome +from .utils import bytes_to_intlist, intlist_to_bytes - -if compat_pycrypto_AES: +if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): @@ -32,16 +24,59 @@ else: return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) -def unpad_pkcs7(data): - return data[:-compat_ord(data[-1])] +def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): + return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) BLOCK_SIZE_BYTES = 16 +def unpad_pkcs7(data): + return data[:-compat_ord(data[-1])] + + +def pkcs7_padding(data): + """ + PKCS#7 padding + + @param {int[]} data cleartext + @returns {int[]} padding data + """ + + remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES + return data + [remaining_length] * remaining_length + + +def pad_block(block, padding_mode): + """ + Pad a block with the given padding mode + @param {int[]} block block to pad + @param padding_mode padding mode + """ + padding_size = BLOCK_SIZE_BYTES - len(block) + + PADDING_BYTE = { + 'pkcs7': padding_size, + 'iso7816': 0x0, + 'whitespace': 0x20, + 'zero': 0x0, + } + + if padding_size < 0: + raise ValueError('Block size exceeded') + elif padding_mode not in PADDING_BYTE: + raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') + + if padding_mode == 'iso7816' and padding_size: + block = block + [0x80] # NB: += mutates list + padding_size -= 1 + + return block + [PADDING_BYTE[padding_mode]] * padding_size + + def aes_ecb_encrypt(data, key, iv=None): """ - Encrypt with aes in ECB mode + Encrypt with aes in ECB mode. Using PKCS#7 padding @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @@ -54,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None): encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - encrypted_data += aes_encrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] + encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) return encrypted_data @@ -145,13 +179,14 @@ def aes_cbc_decrypt(data, key, iv): return decrypted_data -def aes_cbc_encrypt(data, key, iv): +def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): """ - Encrypt with aes in CBC mode. Using PKCS#7 padding + Encrypt with aes in CBC mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte IV + @param padding_mode Padding mode to use @returns {int[]} encrypted data """ expanded_key = key_expansion(key) @@ -161,8 +196,8 @@ def aes_cbc_encrypt(data, key, iv): previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - remaining_length = BLOCK_SIZE_BYTES - len(block) - block += [remaining_length] * remaining_length + block = pad_block(block, padding_mode) + mixed_block = xor(block, previous_cipher_block) encrypted_block = aes_encrypt(mixed_block, expanded_key) @@ -273,8 +308,8 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(compat_b64decode(data)) - password = bytes_to_intlist(password.encode('utf-8')) + data = bytes_to_intlist(base64.b64decode(data)) + password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) @@ -503,20 +538,30 @@ def ghash(subkey, data): last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): - block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + block = data[i: i + BLOCK_SIZE_BYTES] last_y = block_product(xor(last_y, block), subkey) return last_y __all__ = [ - 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', + 'aes_ctr_decrypt', 'aes_decrypt_text', - 'aes_encrypt', + 'aes_decrypt', + 'aes_ecb_decrypt', 'aes_gcm_decrypt_and_verify', 'aes_gcm_decrypt_and_verify_bytes', + + 'aes_cbc_encrypt', + 'aes_cbc_encrypt_bytes', + 'aes_ctr_encrypt', + 'aes_ecb_encrypt', + 'aes_encrypt', + 'key_expansion', + 'pad_block', + 'pkcs7_padding', 'unpad_pkcs7', ] diff --git a/plugins/youtube_download/yt_dlp/cache.py b/plugins/youtube_download/yt_dlp/cache.py index e5cb193..9dd4f2f 100644 --- a/plugins/youtube_download/yt_dlp/cache.py +++ b/plugins/youtube_download/yt_dlp/cache.py @@ -1,37 +1,30 @@ -from __future__ import unicode_literals - -import errno -import io +import contextlib import json import os import re import shutil import traceback +import urllib.parse -from .compat import compat_getenv -from .utils import ( - expand_path, - write_json_file, -) +from .utils import expand_path, traverse_obj, version_tuple, write_json_file +from .version import __version__ -class Cache(object): +class Cache: def __init__(self, ydl): self._ydl = ydl def _get_root_dir(self): res = self._ydl.params.get('cachedir') if res is None: - cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') + cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'yt-dlp') return expand_path(res) def _get_cache_fn(self, section, key, dtype): - assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ - 'invalid section %r' % section - assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key - return os.path.join( - self._get_root_dir(), section, '%s.%s' % (key, dtype)) + assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' + key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters + return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): @@ -45,39 +38,39 @@ class Cache(object): fn = self._get_cache_fn(section, key, dtype) try: - try: - os.makedirs(os.path.dirname(fn)) - except OSError as ose: - if ose.errno != errno.EEXIST: - raise + os.makedirs(os.path.dirname(fn), exist_ok=True) self._ydl.write_debug(f'Saving {section}.{key} to cache') - write_json_file(data, fn) + write_json_file({'yt-dlp_version': __version__, 'data': data}, fn) except Exception: tb = traceback.format_exc() - self._ydl.report_warning( - 'Writing cache to %r failed: %s' % (fn, tb)) + self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') - def load(self, section, key, dtype='json', default=None): + def _validate(self, data, min_ver): + version = traverse_obj(data, 'yt-dlp_version') + if not version: # Backward compatibility + data, version = {'data': data}, '2022.08.19' + if not min_ver or version_tuple(version) >= version_tuple(min_ver): + return data['data'] + self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})') + + def load(self, section, key, dtype='json', default=None, *, min_ver=None): assert dtype in ('json',) if not self.enabled: return default cache_fn = self._get_cache_fn(section, key, dtype) - try: + with contextlib.suppress(OSError): try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') - return json.load(cachef) - except ValueError: + return self._validate(json.load(cachef), min_ver) + except (ValueError, KeyError): try: file_size = os.path.getsize(cache_fn) - except (OSError, IOError) as oe: + except OSError as oe: file_size = str(oe) - self._ydl.report_warning( - 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) - except IOError: - pass # No cache available + self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') return default diff --git a/plugins/youtube_download/yt_dlp/casefold.py b/plugins/youtube_download/yt_dlp/casefold.py new file mode 100644 index 0000000..41a53e5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/casefold.py @@ -0,0 +1,5 @@ +import warnings + +warnings.warn(DeprecationWarning(f'{__name__} is deprecated')) + +casefold = str.casefold diff --git a/plugins/youtube_download/yt_dlp/compat.py b/plugins/youtube_download/yt_dlp/compat.py deleted file mode 100644 index b97d451..0000000 --- a/plugins/youtube_download/yt_dlp/compat.py +++ /dev/null @@ -1,311 +0,0 @@ -# coding: utf-8 - -import asyncio -import base64 -import collections -import ctypes -import getpass -import html -import html.parser -import http -import http.client -import http.cookiejar -import http.cookies -import http.server -import itertools -import optparse -import os -import re -import shlex -import shutil -import socket -import struct -import subprocess -import sys -import tokenize -import urllib -import xml.etree.ElementTree as etree -from subprocess import DEVNULL - - -# HTMLParseError has been deprecated in Python 3.3 and removed in -# Python 3.5. Introducing dummy exception for Python >3.5 for compatible -# and uniform cross-version exception handling -class compat_HTMLParseError(Exception): - pass - - -# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE -# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines -def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - - -class _TreeBuilder(etree.TreeBuilder): - def doctype(self, name, pubid, system): - pass - - -def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) - - -compat_os_name = os._name if os.name == 'java' else os.name - - -if compat_os_name == 'nt': - def compat_shlex_quote(s): - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') -else: - from shlex import quote as compat_shlex_quote - - -def compat_ord(c): - if type(c) is int: - return c - else: - return ord(c) - - -def compat_setenv(key, value, env=os.environ): - env[key] = value - - -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return path -else: - compat_realpath = os.path.realpath - - -def compat_print(s): - assert isinstance(s, compat_str) - print(s) - - -# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 -# See http://bugs.python.org/issue9161 for what is broken -def workaround_optparse_bug9161(): - op = optparse.OptionParser() - og = optparse.OptionGroup(op, 'foo') - try: - og.add_option('-t') - except TypeError: - real_add_option = optparse.OptionGroup.add_option - - def _compat_add_option(self, *args, **kwargs): - enc = lambda v: ( - v.encode('ascii', 'replace') if isinstance(v, compat_str) - else v) - bargs = [enc(a) for a in args] - bkwargs = dict( - (k, enc(v)) for k, v in kwargs.items()) - return real_add_option(self, *bargs, **bkwargs) - optparse.OptionGroup.add_option = _compat_add_option - - -try: - compat_Pattern = re.Pattern -except AttributeError: - compat_Pattern = type(re.compile('')) - - -try: - compat_Match = re.Match -except AttributeError: - compat_Match = type(re.compile('').match('')) - - -try: - compat_asyncio_run = asyncio.run # >= 3.7 -except AttributeError: - def compat_asyncio_run(coro): - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(coro) - - asyncio.run = compat_asyncio_run - - -# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl -# See https://github.com/yt-dlp/yt-dlp/issues/792 -# https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: - _userhome = os.environ['HOME'] - - def compat_expanduser(path): - if not path.startswith('~'): - return path - i = path.replace('\\', '/', 1).find('/') # ~user - if i < 0: - i = len(path) - userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome - return userhome + path[i:] -else: - compat_expanduser = os.path.expanduser - - -try: - from Cryptodome.Cipher import AES as compat_pycrypto_AES -except ImportError: - try: - from Crypto.Cipher import AES as compat_pycrypto_AES - except ImportError: - compat_pycrypto_AES = None - - -WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None - - -def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 - if compat_os_name != 'nt': - return - global WINDOWS_VT_MODE - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - try: - subprocess.Popen('', shell=True, startupinfo=startupinfo) - WINDOWS_VT_MODE = True - except Exception: - pass - - -# Deprecated - -compat_basestring = str -compat_chr = chr -compat_filter = filter -compat_input = input -compat_integer_types = (int, ) -compat_kwargs = lambda kwargs: kwargs -compat_map = map -compat_numeric_types = (int, float, complex) -compat_str = str -compat_xpath = lambda xpath: xpath -compat_zip = zip - -compat_collections_abc = collections.abc -compat_HTMLParser = html.parser.HTMLParser -compat_HTTPError = urllib.error.HTTPError -compat_Struct = struct.Struct -compat_b64decode = base64.b64decode -compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = compat_cookiejar.Cookie -compat_cookies = http.cookies -compat_cookies_SimpleCookie = compat_cookies.SimpleCookie -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace -compat_get_terminal_size = shutil.get_terminal_size -compat_getenv = os.getenv -compat_getpass = getpass.getpass -compat_html_entities = html.entities -compat_html_entities_html5 = compat_html_entities.html5 -compat_http_client = http.client -compat_http_server = http.server -compat_itertools_count = itertools.count -compat_parse_qs = urllib.parse.parse_qs -compat_shlex_split = shlex.split -compat_socket_create_connection = socket.create_connection -compat_struct_pack = struct.pack -compat_struct_unpack = struct.unpack -compat_subprocess_get_DEVNULL = lambda: DEVNULL -compat_tokenize_tokenize = tokenize.tokenize -compat_urllib_error = urllib.error -compat_urllib_parse = urllib.parse -compat_urllib_parse_quote = urllib.parse.quote -compat_urllib_parse_quote_plus = urllib.parse.quote_plus -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus -compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse -compat_urllib_parse_urlunparse = urllib.parse.urlunparse -compat_urllib_request = urllib.request -compat_urllib_request_DataHandler = urllib.request.DataHandler -compat_urllib_response = urllib.response -compat_urlparse = urllib.parse -compat_urlretrieve = urllib.request.urlretrieve -compat_xml_parse_error = etree.ParseError - - -# Set public objects - -__all__ = [ - 'WINDOWS_VT_MODE', - 'compat_HTMLParseError', - 'compat_HTMLParser', - 'compat_HTTPError', - 'compat_Match', - 'compat_Pattern', - 'compat_Struct', - 'compat_asyncio_run', - 'compat_b64decode', - 'compat_basestring', - 'compat_chr', - 'compat_collections_abc', - 'compat_cookiejar', - 'compat_cookiejar_Cookie', - 'compat_cookies', - 'compat_cookies_SimpleCookie', - 'compat_ctypes_WINFUNCTYPE', - 'compat_etree_Element', - 'compat_etree_fromstring', - 'compat_etree_register_namespace', - 'compat_expanduser', - 'compat_filter', - 'compat_get_terminal_size', - 'compat_getenv', - 'compat_getpass', - 'compat_html_entities', - 'compat_html_entities_html5', - 'compat_http_client', - 'compat_http_server', - 'compat_input', - 'compat_integer_types', - 'compat_itertools_count', - 'compat_kwargs', - 'compat_map', - 'compat_numeric_types', - 'compat_ord', - 'compat_os_name', - 'compat_parse_qs', - 'compat_print', - 'compat_pycrypto_AES', - 'compat_realpath', - 'compat_setenv', - 'compat_shlex_quote', - 'compat_shlex_split', - 'compat_socket_create_connection', - 'compat_str', - 'compat_struct_pack', - 'compat_struct_unpack', - 'compat_subprocess_get_DEVNULL', - 'compat_tokenize_tokenize', - 'compat_urllib_error', - 'compat_urllib_parse', - 'compat_urllib_parse_quote', - 'compat_urllib_parse_quote_plus', - 'compat_urllib_parse_unquote', - 'compat_urllib_parse_unquote_plus', - 'compat_urllib_parse_unquote_to_bytes', - 'compat_urllib_parse_urlencode', - 'compat_urllib_parse_urlparse', - 'compat_urllib_parse_urlunparse', - 'compat_urllib_request', - 'compat_urllib_request_DataHandler', - 'compat_urllib_response', - 'compat_urlparse', - 'compat_urlretrieve', - 'compat_xml_parse_error', - 'compat_xpath', - 'compat_zip', - 'windows_enable_vt_mode', - 'workaround_optparse_bug9161', -] diff --git a/plugins/youtube_download/yt_dlp/compat/__init__.py b/plugins/youtube_download/yt_dlp/compat/__init__.py new file mode 100644 index 0000000..832a913 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/__init__.py @@ -0,0 +1,79 @@ +import os +import sys +import xml.etree.ElementTree as etree + +from .compat_utils import passthrough_module + +passthrough_module(__name__, '._deprecated') +del passthrough_module + + +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(ValueError): + pass + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + import re + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + from shlex import quote as compat_shlex_quote # noqa: F401 + + +def compat_ord(c): + return c if isinstance(c, int) else ord(c) + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return os.path.realpath(path) +else: + compat_realpath = os.path.realpath + + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce'): + def compat_expanduser(path): + HOME = os.environ.get('HOME') + if not HOME: + return os.path.expanduser(path) + elif not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + +def urllib_req_to_req(urllib_request): + """Convert urllib Request to a networking Request""" + from ..networking import Request + from ..utils.networking import HTTPHeaderDict + return Request( + urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(), + headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs), + extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None) diff --git a/plugins/youtube_download/yt_dlp/compat/_deprecated.py b/plugins/youtube_download/yt_dlp/compat/_deprecated.py new file mode 100644 index 0000000..607bae9 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/_deprecated.py @@ -0,0 +1,23 @@ +"""Deprecated - New code should avoid these""" +import warnings + +from .compat_utils import passthrough_module + +# XXX: Implement this the same way as other DeprecationWarnings without circular import +passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( + DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) +del passthrough_module + +import base64 +import urllib.error +import urllib.parse + +compat_str = str + +compat_b64decode = base64.b64decode + +compat_urlparse = urllib.parse +compat_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse diff --git a/plugins/youtube_download/yt_dlp/compat/_legacy.py b/plugins/youtube_download/yt_dlp/compat/_legacy.py new file mode 100644 index 0000000..90ccf0f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/_legacy.py @@ -0,0 +1,108 @@ +""" Do not use! """ + +import base64 +import collections +import ctypes +import getpass +import html.entities +import html.parser +import http.client +import http.cookiejar +import http.cookies +import http.server +import itertools +import os +import shlex +import shutil +import socket +import struct +import subprocess +import tokenize +import urllib.error +import urllib.parse +import urllib.request +import xml.etree.ElementTree as etree + +# isort: split +import asyncio # noqa: F401 +import re # noqa: F401 +from asyncio import run as compat_asyncio_run # noqa: F401 +from re import Pattern as compat_Pattern # noqa: F401 +from re import match as compat_Match # noqa: F401 + +from . import compat_expanduser, compat_HTMLParseError, compat_realpath +from .compat_utils import passthrough_module +from ..dependencies import brotli as compat_brotli # noqa: F401 +from ..dependencies import websockets as compat_websockets # noqa: F401 +from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 + +passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) + + +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + +compat_base64_b64decode = base64.b64decode +compat_basestring = str +compat_casefold = str.casefold +compat_chr = chr +compat_collections_abc = collections.abc +compat_cookiejar = compat_http_cookiejar = http.cookiejar +compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies = compat_http_cookies = http.cookies +compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element +compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace +compat_filter = filter +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = compat_getpass_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = html.entities.html5 +compat_html_parser_HTMLParseError = compat_HTMLParseError +compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser +compat_http_client = http.client +compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError +compat_input = input +compat_integer_types = (int, ) +compat_itertools_count = itertools.count +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_os_path_expanduser = compat_expanduser +compat_os_path_realpath = compat_realpath +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_HTTPError = urllib.error.HTTPError +compat_urllib_parse = urllib.parse +compat_urllib_parse_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request = urllib.request +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError +compat_xpath = lambda xpath: xpath +compat_zip = zip +workaround_optparse_bug9161 = lambda: None + +legacy = [] diff --git a/plugins/youtube_download/yt_dlp/compat/compat_utils.py b/plugins/youtube_download/yt_dlp/compat/compat_utils.py new file mode 100644 index 0000000..3ca46d2 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/compat_utils.py @@ -0,0 +1,83 @@ +import collections +import contextlib +import functools +import importlib +import sys +import types + +_NO_ATTRIBUTE = object() + +_Package = collections.namedtuple('Package', ('name', 'version')) + + +def get_package_info(module): + return _Package( + name=getattr(module, '_yt_dlp__identifier', module.__name__), + version=str(next(filter(None, ( + getattr(module, attr, None) + for attr in ('__version__', 'version_string', 'version') + )), None))) + + +def _is_package(module): + return '__path__' in vars(module) + + +def _is_dunder(name): + return name.startswith('__') and name.endswith('__') + + +class EnhancedModule(types.ModuleType): + def __bool__(self): + return vars(self).get('__bool__', lambda: True)() + + def __getattribute__(self, attr): + try: + ret = super().__getattribute__(attr) + except AttributeError: + if _is_dunder(attr): + raise + getter = getattr(self, '__getattr__', None) + if not getter: + raise + ret = getter(attr) + return ret.fget() if isinstance(ret, property) else ret + + +def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None): + """Passthrough parent module into a child module, creating the parent if necessary""" + def __getattr__(attr): + if _is_package(parent): + with contextlib.suppress(ModuleNotFoundError): + return importlib.import_module(f'.{attr}', parent.__name__) + + ret = from_child(attr) + if ret is _NO_ATTRIBUTE: + raise AttributeError(f'module {parent.__name__} has no attribute {attr}') + callback(attr) + return ret + + @functools.lru_cache(maxsize=None) + def from_child(attr): + nonlocal child + if attr not in allowed_attributes: + if ... not in allowed_attributes or _is_dunder(attr): + return _NO_ATTRIBUTE + + if isinstance(child, str): + child = importlib.import_module(child, parent.__name__) + + if _is_package(child): + with contextlib.suppress(ImportError): + return passthrough_module(f'{parent.__name__}.{attr}', + importlib.import_module(f'.{attr}', child.__name__)) + + with contextlib.suppress(AttributeError): + return getattr(child, attr) + + return _NO_ATTRIBUTE + + parent = sys.modules.get(parent, types.ModuleType(parent)) + parent.__class__ = EnhancedModule + parent.__getattr__ = __getattr__ + return parent diff --git a/plugins/youtube_download/yt_dlp/compat/functools.py b/plugins/youtube_download/yt_dlp/compat/functools.py new file mode 100644 index 0000000..ec003ea --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/functools.py @@ -0,0 +1,26 @@ +# flake8: noqa: F405 +from functools import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'functools') +del passthrough_module + +try: + cache # >= 3.9 +except NameError: + cache = lru_cache(maxsize=None) + +try: + cached_property # >= 3.8 +except NameError: + class cached_property: + def __init__(self, func): + update_wrapper(self, func) + self.func = func + + def __get__(self, instance, _): + if instance is None: + return self + setattr(instance, self.func.__name__, self.func(instance)) + return getattr(instance, self.func.__name__) diff --git a/plugins/youtube_download/yt_dlp/compat/imghdr.py b/plugins/youtube_download/yt_dlp/compat/imghdr.py new file mode 100644 index 0000000..5d64ab0 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/imghdr.py @@ -0,0 +1,16 @@ +tests = { + 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', + 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', + 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), + 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), +} + + +def what(file=None, h=None): + """Detect format of image (Currently supports jpeg, png, webp, gif only) + Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py + """ + if h is None: + with open(file, 'rb') as f: + h = f.read(12) + return next((type_ for type_, test in tests.items() if test(h)), None) diff --git a/plugins/youtube_download/yt_dlp/compat/shutil.py b/plugins/youtube_download/yt_dlp/compat/shutil.py new file mode 100644 index 0000000..23239d5 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/shutil.py @@ -0,0 +1,30 @@ +# flake8: noqa: F405 +from shutil import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'shutil') +del passthrough_module + + +import sys + +if sys.platform.startswith('freebsd'): + import errno + import os + import shutil + + # Workaround for PermissionError when using restricted ACL mode on FreeBSD + def copy2(src, dst, *args, **kwargs): + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + shutil.copyfile(src, dst, *args, **kwargs) + try: + shutil.copystat(src, dst, *args, **kwargs) + except PermissionError as e: + if e.errno != getattr(errno, 'EPERM', None): + raise + return dst + + def move(*args, copy_function=copy2, **kwargs): + return shutil.move(*args, copy_function=copy_function, **kwargs) diff --git a/plugins/youtube_download/yt_dlp/compat/types.py b/plugins/youtube_download/yt_dlp/compat/types.py new file mode 100644 index 0000000..4aa3b0e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/types.py @@ -0,0 +1,13 @@ +# flake8: noqa: F405 +from types import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'types') +del passthrough_module + +try: + # NB: pypy has builtin NoneType, so checking NameError won't work + from types import NoneType # >= 3.10 +except ImportError: + NoneType = type(None) diff --git a/plugins/youtube_download/yt_dlp/compat/urllib/__init__.py b/plugins/youtube_download/yt_dlp/compat/urllib/__init__.py new file mode 100644 index 0000000..b27cc61 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/urllib/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa: F405 +from urllib import * # noqa: F403 + +del request +from . import request # noqa: F401 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib') +del passthrough_module diff --git a/plugins/youtube_download/yt_dlp/compat/urllib/request.py b/plugins/youtube_download/yt_dlp/compat/urllib/request.py new file mode 100644 index 0000000..ff63b2f --- /dev/null +++ b/plugins/youtube_download/yt_dlp/compat/urllib/request.py @@ -0,0 +1,40 @@ +# flake8: noqa: F405 +from urllib.request import * # noqa: F403 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib.request') +del passthrough_module + + +from .. import compat_os_name + +if compat_os_name == 'nt': + # On older python versions, proxies are extracted from Windows registry erroneously. [1] + # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] + # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade + # it to http on these older python versions to avoid issues + # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. + # 1: https://github.com/python/cpython/issues/86793 + # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 + import sys + from urllib.request import getproxies_environment, getproxies_registry + + def getproxies_registry_patched(): + proxies = getproxies_registry() + if ( + sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final + or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final + ): + return proxies + + for scheme in ('https', 'ftp'): + if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): + proxies[scheme] = 'http' + proxies[scheme][len(scheme):] + + return proxies + + def getproxies(): + return getproxies_environment() or getproxies_registry_patched() + +del compat_os_name diff --git a/plugins/youtube_download/yt_dlp/cookies.py b/plugins/youtube_download/yt_dlp/cookies.py index fc033a8..a71fbc2 100644 --- a/plugins/youtube_download/yt_dlp/cookies.py +++ b/plugins/youtube_download/yt_dlp/cookies.py @@ -1,12 +1,19 @@ +import base64 +import collections import contextlib -import ctypes +import http.cookiejar +import http.cookies +import io import json import os +import re import shutil import struct import subprocess import sys import tempfile +import time +import urllib.request from datetime import datetime, timedelta, timezone from enum import Enum, auto from hashlib import pbkdf2_hmac @@ -16,93 +23,100 @@ from .aes import ( aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import ( - compat_b64decode, - compat_cookiejar_Cookie, +from .compat import functools +from .dependencies import ( + _SECRETSTORAGE_UNAVAILABLE_REASON, + secretstorage, + sqlite3, ) +from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( - expand_path, Popen, - YoutubeDLCookieJar, + error_to_str, + expand_path, + is_path_like, + sanitize_url, + str_or_none, + try_call, + write_string, ) - -try: - import sqlite3 - SQLITE_AVAILABLE = True -except ImportError: - # although sqlite3 is part of the standard library, it is possible to compile python without - # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 - SQLITE_AVAILABLE = False - - -try: - import secretstorage - SECRETSTORAGE_AVAILABLE = True -except ImportError: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = ( - 'as the `secretstorage` module is not installed. ' - 'Please install by running `python3 -m pip install secretstorage`.') -except Exception as _err: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' - +from .utils._utils import _YDLLogger +from .utils.networking import normalize_url CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} -class YDLLogger: - def __init__(self, ydl=None): - self._ydl = ydl +class YDLLogger(_YDLLogger): + def warning(self, message, only_once=False): # compat + return super().warning(message, once=only_once) - def debug(self, message): - if self._ydl: - self._ydl.write_debug(message) + class ProgressBar(MultilinePrinter): + _DELAY, _timer = 0.1, 0 - def info(self, message): - if self._ydl: - self._ydl.to_screen(f'[Cookies] {message}') + def print(self, message): + if time.time() - self._timer > self._DELAY: + self.print_at_line(f'[Cookies] {message}', 0) + self._timer = time.time() - def warning(self, message, only_once=False): - if self._ydl: - self._ydl.report_warning(message, only_once) + def progress_bar(self): + """Return a context manager with a print method. (Optional)""" + # Do not print to files/pipes, loggers, or when --no-progress is used + if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): + return + file = self._ydl._out_files.error + try: + if not file.isatty(): + return + except BaseException: + return + return self.ProgressBar(file, preserve_output=False) - def error(self, message): - if self._ydl: - self._ydl.report_error(message) + +def _create_progress_bar(logger): + if hasattr(logger, 'progress_bar'): + printer = logger.progress_bar() + if printer: + return printer + printer = QuietMultilinePrinter() + printer.print = lambda _: None + return printer def load_cookies(cookie_file, browser_specification, ydl): cookie_jars = [] if browser_specification is not None: - browser_name, profile, keyring = _parse_browser_specification(*browser_specification) - cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring)) + browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) + cookie_jars.append( + extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) if cookie_file is not None: - cookie_file = expand_path(cookie_file) + is_filename = is_path_like(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) - if os.access(cookie_file, os.R_OK): - jar.load(ignore_discard=True, ignore_expires=True) + if not is_filename or os.access(cookie_file, os.R_OK): + jar.load() cookie_jars.append(jar) return _merge_cookie_jars(cookie_jars) -def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None): +def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): if browser_name == 'firefox': - return _extract_firefox_cookies(profile, logger) + return _extract_firefox_cookies(profile, container, logger) elif browser_name == 'safari': return _extract_safari_cookies(profile, logger) elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, keyring, logger) else: - raise ValueError('unknown browser: {}'.format(browser_name)) + raise ValueError(f'unknown browser: {browser_name}') -def _extract_firefox_cookies(profile, logger): +def _extract_firefox_cookies(profile, container, logger): logger.info('Extracting cookies from firefox') - if not SQLITE_AVAILABLE: + if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() @@ -114,25 +128,54 @@ def _extract_firefox_cookies(profile, logger): else: search_root = os.path.join(_firefox_browser_dir(), profile) - cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') + + container_id = None + if container not in (None, 'none'): + containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json') + if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK): + raise FileNotFoundError(f'could not read containers.json in {search_root}') + with open(containers_path, encoding='utf8') as containers: + identities = json.load(containers).get('identities', []) + container_id = next((context.get('userContextId') for context in identities if container in ( + context.get('name'), + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + )), None) + if not isinstance(container_id, int): + raise ValueError(f'could not find firefox container "{container}" in containers.json') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) - cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') + if isinstance(container_id, int): + logger.debug( + f'Only loading cookies from firefox container "{container}", ID {container_id}') + cursor.execute( + 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?', + (f'%userContextId={container_id}', f'%userContextId={container_id}&%')) + elif container == 'none': + logger.debug('Only loading cookies not belonging to any container') + cursor.execute( + 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")') + else: + cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') jar = YoutubeDLCookieJar() - for host, name, value, path, expiry, is_secure in cursor.fetchall(): - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) - logger.info('Extracted {} cookies from firefox'.format(len(jar))) + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, (host, name, value, path, expiry, is_secure) in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + cookie = http.cookiejar.Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + logger.info(f'Extracted {len(jar)} cookies from firefox') return jar finally: if cursor is not None: @@ -140,39 +183,25 @@ def _extract_firefox_cookies(profile, logger): def _firefox_browser_dir(): - if sys.platform in ('linux', 'linux2'): - return os.path.expanduser('~/.mozilla/firefox') - elif sys.platform == 'win32': - return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + if sys.platform in ('cygwin', 'win32'): + return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': return os.path.expanduser('~/Library/Application Support/Firefox') - else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + return os.path.expanduser('~/.mozilla/firefox') def _get_chromium_based_browser_settings(browser_name): # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md - if sys.platform in ('linux', 'linux2'): - config = _config_home() - browser_dir = { - 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), - 'chrome': os.path.join(config, 'google-chrome'), - 'chromium': os.path.join(config, 'chromium'), - 'edge': os.path.join(config, 'microsoft-edge'), - 'opera': os.path.join(config, 'opera'), - 'vivaldi': os.path.join(config, 'vivaldi'), - }[browser_name] - - elif sys.platform == 'win32': + if sys.platform in ('cygwin', 'win32'): appdata_local = os.path.expandvars('%LOCALAPPDATA%') appdata_roaming = os.path.expandvars('%APPDATA%') browser_dir = { - 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), - 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), - 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), - 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), - 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), - 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), + 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), }[browser_name] elif sys.platform == 'darwin': @@ -187,7 +216,15 @@ def _get_chromium_based_browser_settings(browser_name): }[browser_name] else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + config = _config_home() + browser_dir = { + 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(config, 'google-chrome'), + 'chromium': os.path.join(config, 'chromium'), + 'edge': os.path.join(config, 'microsoft-edge'), + 'opera': os.path.join(config, 'opera'), + 'vivaldi': os.path.join(config, 'vivaldi'), + }[browser_name] # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" @@ -210,11 +247,11 @@ def _get_chromium_based_browser_settings(browser_name): def _extract_chrome_cookies(browser_name, profile, keyring, logger): - logger.info('Extracting cookies from {}'.format(browser_name)) + logger.info(f'Extracting cookies from {browser_name}') - if not SQLITE_AVAILABLE: - logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' - 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + if not sqlite3: + logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() config = _get_chromium_based_browser_settings(browser_name) @@ -228,13 +265,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): if config['supports_profiles']: search_root = os.path.join(config['browser_dir'], profile) else: - logger.error('{} does not support profiles'.format(browser_name)) + logger.error(f'{browser_name} does not support profiles') search_root = config['browser_dir'] - cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) @@ -245,45 +282,55 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' - cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' - 'expires_utc, {} FROM cookies'.format(secure_column)) + cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 - for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') - - if not value and encrypted_value: - value = decryptor.decrypt(encrypted_value) - if value is None: + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, line in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) + if not cookie: failed_cookies += 1 continue - else: - unencrypted_cookies += 1 - - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) + elif not is_encrypted: + unencrypted_cookies += 1 + jar.set_cookie(cookie) if failed_cookies > 0: - failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + failed_message = f' ({failed_cookies} could not be decrypted)' else: failed_message = '' - logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) - counts = decryptor.cookie_counts.copy() + logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') + counts = decryptor._cookie_counts.copy() counts['unencrypted'] = unencrypted_cookies - logger.debug('cookie version breakdown: {}'.format(counts)) + logger.debug(f'cookie version breakdown: {counts}') return jar finally: if cursor is not None: cursor.connection.close() +def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): + host_key = host_key.decode() + name = name.decode() + value = value.decode() + path = path.decode() + is_encrypted = not value and encrypted_value + + if is_encrypted: + value = decryptor.decrypt(encrypted_value) + if value is None: + return is_encrypted, None + + return is_encrypted, http.cookiejar.Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + + class ChromeCookieDecryptor: """ Overview: @@ -291,7 +338,9 @@ class ChromeCookieDecryptor: Linux: - cookies are either v10 or v11 - v10: AES-CBC encrypted with a fixed key + - also attempts empty password if decryption fails - v11: AES-CBC encrypted with an OS protected key (keyring) + - also attempts empty password if decryption fails - v11 keys can be stored in various places depending on the activate desktop environment [2] Mac: @@ -306,64 +355,71 @@ class ChromeCookieDecryptor: Sources: - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ - - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc - KeyStorageLinux::CreateService """ - def decrypt(self, encrypted_value): - raise NotImplementedError + _cookie_counts = {} - @property - def cookie_counts(self): - raise NotImplementedError + def decrypt(self, encrypted_value): + raise NotImplementedError('Must be implemented by sub classes') def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): - if sys.platform in ('linux', 'linux2'): - return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) - elif sys.platform == 'darwin': + if sys.platform == 'darwin': return MacChromeCookieDecryptor(browser_keyring_name, logger) - elif sys.platform == 'win32': + elif sys.platform in ('win32', 'cygwin'): return WindowsChromeCookieDecryptor(browser_root, logger) - else: - raise NotImplementedError('Chrome cookie decryption is not supported ' - 'on this platform: {}'.format(sys.platform)) + return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger, *, keyring=None): self._logger = logger self._v10_key = self.derive_key(b'peanuts') - password = _get_linux_keyring_password(browser_keyring_name, keyring, logger) - self._v11_key = None if password is None else self.derive_key(password) + self._empty_key = self.derive_key(b'') self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} + self._browser_keyring_name = browser_keyring_name + self._keyring = keyring + + @functools.cached_property + def _v11_key(self): + password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger) + return None if password is None else self.derive_key(password) @staticmethod def derive_key(password): # values from - # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): + """ + + following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt + with an empty password. The failure detection is not the same as what chromium uses so the + results won't be perfect + + References: + - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/ + - a bugfix to try an empty password as a fallback + """ version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 - return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) elif version == b'v11': self._cookie_counts['v11'] += 1 if self._v11_key is None: self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) return None - return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) + return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) else: + self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) self._cookie_counts['other'] += 1 return None @@ -378,13 +434,9 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): @staticmethod def derive_key(password): # values from - # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] @@ -395,12 +447,12 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None - return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) else: self._cookie_counts['other'] += 1 # other prefixes are considered 'old data' which were stored as plaintext - # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm return encrypted_value @@ -410,10 +462,6 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): self._v10_key = _get_windows_v10_key(browser_root, logger) self._cookie_counts = {'v10': 0, 'other': 0} - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] @@ -424,7 +472,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None - # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc # kNonceLength nonce_length = 96 // 8 # boringssl @@ -441,26 +489,33 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): else: self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted - # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc - return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): - if profile is not None: - logger.error('safari does not support profiles') if sys.platform != 'darwin': - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') - cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + if profile: + cookies_path = os.path.expanduser(profile) + if not os.path.isfile(cookies_path): + raise FileNotFoundError('custom safari cookies database not found') - if not os.path.isfile(cookies_path): - raise FileNotFoundError('could not find safari cookies database') + else: + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + logger.debug('Trying secondary cookie location') + cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies') + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') with open(cookies_path, 'rb') as f: cookies_data = f.read() jar = parse_safari_cookies(cookies_data, logger=logger) - logger.info('Extracted {} cookies from safari'.format(len(jar))) + logger.info(f'Extracted {len(jar)} cookies from safari') return jar @@ -476,7 +531,7 @@ class DataParser: def read_bytes(self, num_bytes): if num_bytes < 0: - raise ParserError('invalid read of {} bytes'.format(num_bytes)) + raise ParserError(f'invalid read of {num_bytes} bytes') end = self.cursor + num_bytes if end > len(self._data): raise ParserError('reached end of input') @@ -487,7 +542,7 @@ class DataParser: def expect_bytes(self, expected_value, message): value = self.read_bytes(len(expected_value)) if value != expected_value: - raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') def read_uint(self, big_endian=False): data_format = '>I' if big_endian else ' 0: - self._logger.debug('skipping {} bytes ({}): {}'.format( - num_bytes, description, self.read_bytes(num_bytes))) + self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') elif num_bytes < 0: - raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + raise ParserError(f'invalid skip of {num_bytes} bytes') def skip_to(self, offset, description='unknown'): self.skip(offset - self.cursor, description) @@ -538,15 +592,17 @@ def _parse_safari_cookies_page(data, jar, logger): number_of_cookies = p.read_uint() record_offsets = [p.read_uint() for _ in range(number_of_cookies)] if number_of_cookies == 0: - logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + logger.debug(f'a cookies page of size {len(data)} has no cookies') return p.skip_to(record_offsets[0], 'unknown page header field') - for record_offset in record_offsets: - p.skip_to(record_offset, 'space between records') - record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) - p.read_bytes(record_length) + with _create_progress_bar(logger) as progress_bar: + for i, record_offset in enumerate(record_offsets): + progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) p.skip_to_end('space in between pages') @@ -583,7 +639,7 @@ def _parse_safari_cookies_record(data, jar, logger): p.skip_to(record_size, 'space at the end of the record') - cookie = compat_cookiejar_Cookie( + cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, @@ -616,19 +672,27 @@ class _LinuxDesktopEnvironment(Enum): """ OTHER = auto() CINNAMON = auto() + DEEPIN = auto() GNOME = auto() - KDE = auto() + KDE3 = auto() + KDE4 = auto() + KDE5 = auto() + KDE6 = auto() PANTHEON = auto() + UKUI = auto() UNITY = auto() XFCE = auto() + LXQT = auto() class _LinuxKeyring(Enum): """ - https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h SelectedLinuxBackend """ - KWALLET = auto() + KWALLET = auto() # KDE4 + KWALLET5 = auto() + KWALLET6 = auto() GNOMEKEYRING = auto() BASICTEXT = auto() @@ -636,7 +700,7 @@ class _LinuxKeyring(Enum): SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() -def _get_linux_desktop_environment(env): +def _get_linux_desktop_environment(env, logger): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc GetDesktopEnvironment @@ -651,51 +715,97 @@ def _get_linux_desktop_environment(env): return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY + elif xdg_current_desktop == 'Deepin': + return _LinuxDesktopEnvironment.DEEPIN elif xdg_current_desktop == 'GNOME': return _LinuxDesktopEnvironment.GNOME elif xdg_current_desktop == 'X-Cinnamon': return _LinuxDesktopEnvironment.CINNAMON elif xdg_current_desktop == 'KDE': - return _LinuxDesktopEnvironment.KDE + kde_version = env.get('KDE_SESSION_VERSION', None) + if kde_version == '5': + return _LinuxDesktopEnvironment.KDE5 + elif kde_version == '6': + return _LinuxDesktopEnvironment.KDE6 + elif kde_version == '4': + return _LinuxDesktopEnvironment.KDE4 + else: + logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') + return _LinuxDesktopEnvironment.KDE4 elif xdg_current_desktop == 'Pantheon': return _LinuxDesktopEnvironment.PANTHEON elif xdg_current_desktop == 'XFCE': return _LinuxDesktopEnvironment.XFCE + elif xdg_current_desktop == 'UKUI': + return _LinuxDesktopEnvironment.UKUI + elif xdg_current_desktop == 'LXQt': + return _LinuxDesktopEnvironment.LXQT + else: + logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + elif desktop_session is not None: - if desktop_session in ('mate', 'gnome'): + if desktop_session == 'deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif desktop_session in ('mate', 'gnome'): return _LinuxDesktopEnvironment.GNOME - elif 'kde' in desktop_session: - return _LinuxDesktopEnvironment.KDE - elif 'xfce' in desktop_session: + elif desktop_session in ('kde4', 'kde-plasma'): + return _LinuxDesktopEnvironment.KDE4 + elif desktop_session == 'kde': + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + elif 'xfce' in desktop_session or desktop_session == 'xubuntu': return _LinuxDesktopEnvironment.XFCE + elif desktop_session == 'ukui': + return _LinuxDesktopEnvironment.UKUI + else: + logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') + else: if 'GNOME_DESKTOP_SESSION_ID' in env: return _LinuxDesktopEnvironment.GNOME elif 'KDE_FULL_SESSION' in env: - return _LinuxDesktopEnvironment.KDE + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 return _LinuxDesktopEnvironment.OTHER def _choose_linux_keyring(logger): """ - https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc - SelectBackend + SelectBackend in [1] + + There is currently support for forcing chromium to use BASIC_TEXT by creating a file called + `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1]) + does not appear to be called anywhere other than in tests, so the user would have to create this file manually + and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring. + + References: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc """ - desktop_environment = _get_linux_desktop_environment(os.environ) - logger.debug('detected desktop environment: {}'.format(desktop_environment.name)) - if desktop_environment == _LinuxDesktopEnvironment.KDE: + desktop_environment = _get_linux_desktop_environment(os.environ, logger) + logger.debug(f'detected desktop environment: {desktop_environment.name}') + if desktop_environment == _LinuxDesktopEnvironment.KDE4: linux_keyring = _LinuxKeyring.KWALLET - elif desktop_environment == _LinuxDesktopEnvironment.OTHER: + elif desktop_environment == _LinuxDesktopEnvironment.KDE5: + linux_keyring = _LinuxKeyring.KWALLET5 + elif desktop_environment == _LinuxDesktopEnvironment.KDE6: + linux_keyring = _LinuxKeyring.KWALLET6 + elif desktop_environment in ( + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + ): linux_keyring = _LinuxKeyring.BASICTEXT else: linux_keyring = _LinuxKeyring.GNOMEKEYRING return linux_keyring -def _get_kwallet_network_wallet(logger): +def _get_kwallet_network_wallet(keyring, logger): """ The name of the wallet used to store network passwords. - https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc KWalletDBus::NetworkWallet which does a dbus call to the following function: https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html @@ -703,28 +813,38 @@ def _get_kwallet_network_wallet(logger): """ default_wallet = 'kdewallet' try: - proc = Popen([ - 'dbus-send', '--session', '--print-reply=literal', - '--dest=org.kde.kwalletd5', - '/modules/kwalletd5', - 'org.kde.KWallet.networkWallet' - ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + if keyring == _LinuxKeyring.KWALLET: + service_name = 'org.kde.kwalletd' + wallet_path = '/modules/kwalletd' + elif keyring == _LinuxKeyring.KWALLET5: + service_name = 'org.kde.kwalletd5' + wallet_path = '/modules/kwalletd5' + elif keyring == _LinuxKeyring.KWALLET6: + service_name = 'org.kde.kwalletd6' + wallet_path = '/modules/kwalletd6' + else: + raise ValueError(keyring) - stdout, stderr = proc.communicate_or_kill() - if proc.returncode != 0: + stdout, _, returncode = Popen.run([ + 'dbus-send', '--session', '--print-reply=literal', + f'--dest={service_name}', + wallet_path, + 'org.kde.KWallet.networkWallet' + ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + if returncode: logger.warning('failed to read NetworkWallet') return default_wallet else: - network_wallet = stdout.decode('utf-8').strip() - logger.debug('NetworkWallet = "{}"'.format(network_wallet)) - return network_wallet - except BaseException as e: - logger.warning('exception while obtaining NetworkWallet: {}'.format(e)) + logger.debug(f'NetworkWallet = "{stdout.strip()}"') + return stdout.strip() + except Exception as e: + logger.warning(f'exception while obtaining NetworkWallet: {e}') return default_wallet -def _get_kwallet_password(browser_keyring_name, logger): - logger.debug('using kwallet-query to obtain password from kwallet') +def _get_kwallet_password(browser_keyring_name, keyring, logger): + logger.debug(f'using kwallet-query to obtain password from {keyring.name}') if shutil.which('kwallet-query') is None: logger.error('kwallet-query command not found. KWallet and kwallet-query ' @@ -732,20 +852,19 @@ def _get_kwallet_password(browser_keyring_name, logger): 'included in the kwallet package for your distribution') return b'' - network_wallet = _get_kwallet_network_wallet(logger) + network_wallet = _get_kwallet_network_wallet(keyring, logger) try: - proc = Popen([ + stdout, _, returncode = Popen.run([ 'kwallet-query', - '--read-password', '{} Safe Storage'.format(browser_keyring_name), - '--folder', '{} Keys'.format(browser_keyring_name), + '--read-password', f'{browser_keyring_name} Safe Storage', + '--folder', f'{browser_keyring_name} Keys', network_wallet ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) - stdout, stderr = proc.communicate_or_kill() - if proc.returncode != 0: - logger.error('kwallet-query failed with return code {}. Please consult ' - 'the kwallet-query man page for details'.format(proc.returncode)) + if returncode: + logger.error(f'kwallet-query failed with return code {returncode}. ' + 'Please consult the kwallet-query man page for details') return b'' else: if stdout.lower().startswith(b'failed to read'): @@ -755,22 +874,21 @@ def _get_kwallet_password(browser_keyring_name, logger): # checks hasEntry. To verify this: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" # while starting chrome. - # this may be a bug as the intended behaviour is to generate a random password and store - # it, but that doesn't matter here. + # this was identified as a bug later and fixed in + # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0 + # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764 return b'' else: logger.debug('password found') - if stdout[-1:] == b'\n': - stdout = stdout[:-1] - return stdout - except BaseException as e: - logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})') + return stdout.rstrip(b'\n') + except Exception as e: + logger.warning(f'exception running kwallet-query: {error_to_str(e)}') return b'' def _get_gnome_keyring_password(browser_keyring_name, logger): - if not SECRETSTORAGE_AVAILABLE: - logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON)) + if not secretstorage: + logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') return b'' # the Gnome keyring does not seem to organise keys in the same way as KWallet, # using `dbus-monitor` during startup, it can be observed that chromium lists all keys @@ -779,7 +897,7 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): with contextlib.closing(secretstorage.dbus_init()) as con: col = secretstorage.get_default_collection(con) for item in col.get_all_items(): - if item.get_label() == '{} Safe Storage'.format(browser_keyring_name): + if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() else: logger.error('failed to read from keyring') @@ -796,8 +914,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger): keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) logger.debug(f'Chosen keyring: {keyring.name}') - if keyring == _LinuxKeyring.KWALLET: - return _get_kwallet_password(browser_keyring_name, logger) + if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6): + return _get_kwallet_password(browser_keyring_name, keyring, logger) elif keyring == _LinuxKeyring.GNOMEKEYRING: return _get_gnome_keyring_password(browser_keyring_name, logger) elif keyring == _LinuxKeyring.BASICTEXT: @@ -809,35 +927,41 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger): def _get_mac_keyring_password(browser_keyring_name, logger): logger.debug('using find-generic-password to obtain password from OSX keychain') try: - proc = Popen( + stdout, _, returncode = Popen.run( ['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' - '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) - - stdout, stderr = proc.communicate_or_kill() - if stdout[-1:] == b'\n': - stdout = stdout[:-1] - return stdout - except BaseException as e: - logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})') + if returncode: + logger.warning('find-generic-password failed') + return None + return stdout.rstrip(b'\n') + except Exception as e: + logger.warning(f'exception running find-generic-password: {error_to_str(e)}') return None def _get_windows_v10_key(browser_root, logger): - path = _find_most_recently_used_file(browser_root, 'Local State') + """ + References: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc + """ + path = _find_most_recently_used_file(browser_root, 'Local State', logger) if path is None: logger.error('could not find local state file') return None - with open(path, 'r', encoding='utf8') as f: + logger.debug(f'Found local state file at "{path}"') + with open(path, encoding='utf8') as f: data = json.load(f) try: + # kOsCryptEncryptedKeyPrefName in [1] base64_key = data['os_crypt']['encrypted_key'] except KeyError: logger.error('no encrypted key in Local State') return None - encrypted_key = compat_b64decode(base64_key) + encrypted_key = base64.b64decode(base64_key) + # kDPAPIKeyPrefix in [1] prefix = b'DPAPI' if not encrypted_key.startswith(prefix): logger.error('invalid key') @@ -849,13 +973,15 @@ def pbkdf2_sha1(password, salt, iterations, key_length): return pbkdf2_hmac('sha1', password, salt, iterations, key_length) -def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): - plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) - try: - return plaintext.decode('utf-8') - except UnicodeDecodeError: - logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) - return None +def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): + for key in keys: + plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) + try: + return plaintext.decode() + except UnicodeDecodeError: + pass + logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): @@ -866,7 +992,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): return None try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -877,10 +1003,12 @@ def _decrypt_windows_dpapi(ciphertext, logger): References: - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata """ - from ctypes.wintypes import DWORD + + import ctypes + import ctypes.wintypes class DATA_BLOB(ctypes.Structure): - _fields_ = [('cbData', DWORD), + _fields_ = [('cbData', ctypes.wintypes.DWORD), ('pbData', ctypes.POINTER(ctypes.c_char))] buffer = ctypes.create_string_buffer(ciphertext) @@ -917,17 +1045,20 @@ def _open_database_copy(database_path, tmpdir): def _get_column_names(cursor, table_name): - table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall() - return [row[1].decode('utf-8') for row in table_info] + table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() + return [row[1].decode() for row in table_info] -def _find_most_recently_used_file(root, filename): +def _find_most_recently_used_file(root, filename, logger): # if there are multiple browser profiles, take the most recently used one - paths = [] - for root, dirs, files in os.walk(root): - for file in files: - if file == filename: - paths.append(os.path.join(root, file)) + i, paths = 0, [] + with _create_progress_bar(logger) as progress_bar: + for curr_root, dirs, files in os.walk(root): + for file in files: + i += 1 + progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') + if file == filename: + paths.append(os.path.join(curr_root, file)) return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) @@ -945,11 +1076,249 @@ def _is_path(value): return os.path.sep in value -def _parse_browser_specification(browser_name, profile=None, keyring=None): +def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser: "{browser_name}"') if keyring not in (None, *SUPPORTED_KEYRINGS): raise ValueError(f'unsupported keyring: "{keyring}"') - if profile is not None and _is_path(profile): - profile = os.path.expanduser(profile) - return browser_name, profile, keyring + if profile is not None and _is_path(expand_path(profile)): + profile = expand_path(profile) + return browser_name, profile, keyring, container + + +class LenientSimpleCookie(http.cookies.SimpleCookie): + """More lenient version of http.cookies.SimpleCookie""" + # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py + # We use Morsel's legal key chars to avoid errors on setting values + _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') + _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') + + _RESERVED = { + "expires", + "path", + "comment", + "domain", + "max-age", + "secure", + "httponly", + "version", + "samesite", + } + + _FLAGS = {"secure", "httponly"} + + # Added 'bad' group to catch the remaining value + _COOKIE_PATTERN = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P # Start of group 'key' + [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + ( # Start of potential value + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + ) # End of group 'val' + | # or + (?P (?:\\;|[^;])*?) # 'bad' group fallback for invalid values + ) # End of potential value + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) + + def load(self, data): + # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 + if not isinstance(data, str): + return super().load(data) + + morsel = None + for match in self._COOKIE_PATTERN.finditer(data): + if match.group('bad'): + morsel = None + continue + + key, value = match.group('key', 'val') + + is_attribute = False + if key.startswith('$'): + key = key[1:] + is_attribute = True + + lower_key = key.lower() + if lower_key in self._RESERVED: + if morsel is None: + continue + + if value is None: + if lower_key not in self._FLAGS: + morsel = None + continue + value = True + else: + value, _ = self.value_decode(value) + + morsel[key] = value + + elif is_attribute: + morsel = None + + elif value is not None: + morsel = self.get(key, http.cookies.Morsel()) + real_value, coded_value = self.value_decode(value) + morsel.set(key, real_value, coded_value) + self[key] = morsel + + else: + morsel = None + + +class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): + """ + See [1] for cookie file format. + + 1. https://curl.haxx.se/docs/http-cookies.html + """ + _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by yt-dlp. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) + + def __init__(self, filename=None, *args, **kwargs): + super().__init__(None, *args, **kwargs) + if is_path_like(filename): + filename = os.fspath(filename) + self.filename = filename + + @staticmethod + def _true_or_false(cndn): + return 'TRUE' if cndn else 'FALSE' + + @contextlib.contextmanager + def open(self, file, *, write=False): + if is_path_like(file): + with open(file, 'w' if write else 'r', encoding='utf-8') as f: + yield f + else: + if write: + file.truncate(0) + yield file + + def _really_save(self, f, ignore_discard, ignore_expires): + now = time.time() + for cookie in self: + if (not ignore_discard and cookie.discard + or not ignore_expires and cookie.is_expired(now)): + continue + name, value = cookie.name, cookie.value + if value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name, value = '', name + f.write('%s\n' % '\t'.join(( + cookie.domain, + self._true_or_false(cookie.domain.startswith('.')), + cookie.path, + self._true_or_false(cookie.secure), + str_or_none(cookie.expires, default=''), + name, value + ))) + + def save(self, filename=None, ignore_discard=True, ignore_expires=True): + """ + Save cookies to a file. + Code is taken from CPython 3.6 + https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ + + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) + + # Store session cookies with `expires` set to 0 instead of an empty string + for cookie in self: + if cookie.expires is None: + cookie.expires = 0 + + with self.open(filename, write=True) as f: + f.write(self._HEADER) + self._really_save(f, ignore_discard, ignore_expires) + + def load(self, filename=None, ignore_discard=True, ignore_expires=True): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) + + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + + cf = io.StringIO() + with self.open(filename) as f: + for line in f: + try: + cf.write(prepare_line(line)) + except http.cookiejar.LoadError as e: + if f'{line.strip()} '[0] in '[{"': + raise http.cookiejar.LoadError( + 'Cookies file must be Netscape formatted, not JSON. See ' + 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') + write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') + continue + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) + # Session cookies are denoted by either `expires` field set to + # an empty string or 0. MozillaCookieJar only recognizes the former + # (see [1]). So we need force the latter to be recognized as session + # cookies on our own. + # Session cookies may be important for cookies-based authentication, + # e.g. usually, when user does not check 'Remember me' check box while + # logging in on a site, some important cookies are stored as session + # cookies so that not recognizing them will result in failed login. + # 1. https://bugs.python.org/issue17164 + for cookie in self: + # Treat `expires=0` cookies as session cookies + if cookie.expires == 0: + cookie.expires = None + cookie.discard = True + + def get_cookie_header(self, url): + """Generate a Cookie HTTP header for a given url""" + cookie_req = urllib.request.Request(normalize_url(sanitize_url(url))) + self.add_cookie_header(cookie_req) + return cookie_req.get_header('Cookie') + + def get_cookies_for_url(self, url): + """Generate a list of Cookie objects for a given url""" + # Policy `_now` attribute must be set before calling `_cookies_for_request` + # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360 + self._policy._now = self._now = int(time.time()) + return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url)))) + + def clear(self, *args, **kwargs): + with contextlib.suppress(KeyError): + return super().clear(*args, **kwargs) diff --git a/plugins/youtube_download/yt_dlp/dependencies/Cryptodome.py b/plugins/youtube_download/yt_dlp/dependencies/Cryptodome.py new file mode 100644 index 0000000..2cfa4c9 --- /dev/null +++ b/plugins/youtube_download/yt_dlp/dependencies/Cryptodome.py @@ -0,0 +1,38 @@ +from ..compat.compat_utils import passthrough_module + +try: + import Cryptodome as _parent +except ImportError: + try: + import Crypto as _parent + except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python + _parent = passthrough_module(__name__, 'no_Cryptodome') + __bool__ = lambda: False + +del passthrough_module + +__version__ = '' +AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None +try: + if _parent.__name__ == 'Cryptodome': + from Cryptodome import __version__ + from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 + from Cryptodome.Hash import CMAC, SHA1 + from Cryptodome.PublicKey import RSA + elif _parent.__name__ == 'Crypto': + from Crypto import __version__ + from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 + from Crypto.Hash import CMAC, SHA1 # noqa: F401 + from Crypto.PublicKey import RSA # noqa: F401 +except ImportError: + __version__ = f'broken {__version__}'.strip() + + +_yt_dlp__identifier = _parent.__name__ +if AES and _yt_dlp__identifier == 'Crypto': + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + AES.new(b'abcdefghijklmnop') + except TypeError: + _yt_dlp__identifier = 'pycrypto' diff --git a/plugins/youtube_download/yt_dlp/dependencies/__init__.py b/plugins/youtube_download/yt_dlp/dependencies/__init__.py new file mode 100644 index 0000000..6e7d29c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/dependencies/__init__.py @@ -0,0 +1,83 @@ +# flake8: noqa: F401 +"""Imports all optional dependencies for the project. +An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace""" + +try: + import brotlicffi as brotli +except ImportError: + try: + import brotli + except ImportError: + brotli = None + + +try: + import certifi +except ImportError: + certifi = None +else: + from os.path import exists as _path_exists + + # The certificate may not be bundled in executable + if not _path_exists(certifi.where()): + certifi = None + + +try: + import mutagen +except ImportError: + mutagen = None + + +secretstorage = None +try: + import secretstorage + _SECRETSTORAGE_UNAVAILABLE_REASON = None +except ImportError: + _SECRETSTORAGE_UNAVAILABLE_REASON = ( + 'as the `secretstorage` module is not installed. ' + 'Please install by running `python3 -m pip install secretstorage`') +except Exception as _err: + _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' + + +try: + import sqlite3 +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + sqlite3 = None + + +try: + import websockets +except (ImportError, SyntaxError): + # websockets 3.10 on python 3.6 causes SyntaxError + # See https://github.com/yt-dlp/yt-dlp/issues/2633 + websockets = None + + +try: + import xattr # xattr or pyxattr +except ImportError: + xattr = None +else: + if hasattr(xattr, 'set'): # pyxattr + xattr._yt_dlp__identifier = 'pyxattr' + + +from . import Cryptodome + +all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} +available_dependencies = {k: v for k, v in all_dependencies.items() if v} + + +# Deprecated +Cryptodome_AES = Cryptodome.AES + + +__all__ = [ + 'all_dependencies', + 'available_dependencies', + *all_dependencies.keys(), +] diff --git a/plugins/youtube_download/yt_dlp/downloader/__init__.py b/plugins/youtube_download/yt_dlp/downloader/__init__.py index acc19f4..51a9f28 100644 --- a/plugins/youtube_download/yt_dlp/downloader/__init__.py +++ b/plugins/youtube_download/yt_dlp/downloader/__init__.py @@ -1,10 +1,4 @@ -from __future__ import unicode_literals - -from ..compat import compat_str -from ..utils import ( - determine_protocol, - NO_DEFAULT -) +from ..utils import NO_DEFAULT, determine_protocol def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): @@ -29,20 +23,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N # Some of these require get_suitable_downloader from .common import FileDownloader from .dash import DashSegmentsFD +from .external import FFmpegFD, get_external_downloader from .f4m import F4mFD +from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD -from .rtmp import RtmpFD -from .rtsp import RtspFD from .ism import IsmFD from .mhtml import MhtmlFD -from .niconico import NiconicoDmcFD +from .niconico import NiconicoDmcFD, NiconicoLiveFD +from .rtmp import RtmpFD +from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD -from .external import ( - get_external_downloader, - FFmpegFD, -) PROTOCOL_MAP = { 'rtmp': RtmpFD, @@ -58,6 +50,8 @@ PROTOCOL_MAP = { 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, + 'niconico_live': NiconicoLiveFD, + 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, @@ -66,10 +60,11 @@ PROTOCOL_MAP = { def shorten_protocol_name(proto, simplify=False): short_protocol_names = { - 'm3u8_native': 'm3u8_n', - 'rtmp_ffmpeg': 'rtmp_f', + 'm3u8_native': 'm3u8', + 'm3u8': 'm3u8F', + 'rtmp_ffmpeg': 'rtmpF', 'http_dash_segments': 'dash', - 'http_dash_segments_generator': 'dash_g', + 'http_dash_segments_generator': 'dashG', 'niconico_dmc': 'dmc', 'websocket_frag': 'WSfrag', } @@ -77,6 +72,7 @@ def shorten_protocol_name(proto, simplify=False): short_protocol_names.update({ 'https': 'http', 'ftps': 'ftp', + 'm3u8': 'm3u8', # Reverse above m3u8 mapping 'm3u8_native': 'm3u8', 'http_dash_segments_generator': 'dash', 'rtmp_ffmpeg': 'rtmp', @@ -91,13 +87,13 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if default is NO_DEFAULT: default = HttpFD - # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): - # return FFmpegFD + if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict): + return FFmpegFD info_dict['protocol'] = protocol downloaders = params.get('external_downloader') external_downloader = ( - downloaders if isinstance(downloaders, compat_str) or downloaders is None + downloaders if isinstance(downloaders, str) or downloaders is None else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) if external_downloader is None: @@ -117,7 +113,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): return FFmpegFD elif (external_downloader or '').lower() == 'native': return HlsFD - elif get_suitable_downloader( + elif protocol == 'm3u8_native' and get_suitable_downloader( info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): return HlsFD elif params.get('hls_prefer_native') is True: diff --git a/plugins/youtube_download/yt_dlp/downloader/common.py b/plugins/youtube_download/yt_dlp/downloader/common.py index 37321e3..b71d7ee 100644 --- a/plugins/youtube_download/yt_dlp/downloader/common.py +++ b/plugins/youtube_download/yt_dlp/downloader/common.py @@ -1,30 +1,40 @@ -from __future__ import division, unicode_literals - +import contextlib +import errno +import functools import os +import random import re import time -import random -import errno +from ..minicurses import ( + BreaklineStatusPrinter, + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, +) from ..utils import ( + IDENTITY, + NO_DEFAULT, + LockingUnsupportedError, + Namespace, + RetryManager, + classproperty, decodeArgument, + deprecation_warning, encodeFilename, - error_to_compat_str, format_bytes, + join_nonempty, + parse_bytes, + remove_start, sanitize_open, shell_quote, timeconvert, timetuple_from_msec, -) -from ..minicurses import ( - MultilineLogger, - MultilinePrinter, - QuietMultilinePrinter, - BreaklineStatusPrinter + try_call, ) -class FileDownloader(object): +class FileDownloader: """File Downloader class. File downloader objects are the ones responsible of downloading the @@ -40,8 +50,9 @@ class FileDownloader(object): quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) - retries: Number of times to retry for HTTP error 5xx - file_access_retries: Number of times to retry on file access error + retries: Number of times to retry for expected network errors. + Default is 0 for API, but 10 for CLI + file_access_retries: Number of times to retry on file access error (default: 3) buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. @@ -62,6 +73,7 @@ class FileDownloader(object): useful for bypassing bandwidth throttling imposed by a webserver (experimental) progress_template: See YoutubeDL.py + retry_sleep_functions: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -71,21 +83,51 @@ class FileDownloader(object): def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" - self.ydl = ydl + self._set_ydl(ydl) self._progress_hooks = [] self.params = params self._prepare_multiline_status() self.add_progress_hook(self.report_progress) + def _set_ydl(self, ydl): + self.ydl = ydl + + for func in ( + 'deprecation_warning', + 'deprecated_feature', + 'report_error', + 'report_file_already_downloaded', + 'report_warning', + 'to_console_title', + 'to_stderr', + 'trouble', + 'write_debug', + ): + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + + __to_screen = to_screen + + @classproperty + def FD_NAME(cls): + return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() + @staticmethod def format_seconds(seconds): + if seconds is None: + return ' Unknown' time = timetuple_from_msec(seconds * 1000) if time.hours > 99: return '--:--:--' - if not time.hours: - return '%02d:%02d' % time[1:-1] return '%02d:%02d:%02d' % time[:-1] + @classmethod + def format_eta(cls, seconds): + return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' + @staticmethod def calc_percent(byte_counter, data_len): if data_len is None: @@ -94,29 +136,23 @@ class FileDownloader(object): @staticmethod def format_percent(percent): - if percent is None: - return '---.-%' - elif percent == 100: - return '100%' - return '%6s' % ('%3.1f%%' % percent) + return ' N/A%' if percent is None else f'{percent:>5.1f}%' - @staticmethod - def calc_eta(start, now, total, current): + @classmethod + def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT): + if total is NO_DEFAULT: + rate, remaining = start_or_rate, now_or_remaining + if None in (rate, remaining): + return None + return int(float(remaining) / rate) + + start, now = start_or_rate, now_or_remaining if total is None: return None if now is None: now = time.time() - dif = now - start - if current == 0 or dif < 0.001: # One millisecond - return None - rate = float(current) / dif - return int((float(total) - float(current)) / rate) - - @staticmethod - def format_eta(eta): - if eta is None: - return '--:--' - return FileDownloader.format_seconds(eta) + rate = cls.calc_speed(start, now, current) + return rate and int((float(total) - float(current)) / rate) @staticmethod def calc_speed(start, now, bytes): @@ -127,13 +163,17 @@ class FileDownloader(object): @staticmethod def format_speed(speed): - if speed is None: - return '%10s' % '---b/s' - return '%10s' % ('%s/s' % format_bytes(speed)) + return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' @staticmethod def format_retries(retries): - return 'inf' if retries == float('inf') else '%.0f' % retries + return 'inf' if retries == float('inf') else int(retries) + + @staticmethod + def filesize_or_none(unencoded_filename): + if os.path.isfile(unencoded_filename): + return os.path.getsize(unencoded_filename) + return 0 @staticmethod def best_block_size(elapsed_time, bytes): @@ -151,33 +191,9 @@ class FileDownloader(object): @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) - if matchobj is None: - return None - number = float(matchobj.group(1)) - multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return int(round(number * multiplier)) - - def to_screen(self, *args, **kargs): - self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) - - def to_stderr(self, message): - self.ydl.to_stderr(message) - - def to_console_title(self, message): - self.ydl.to_console_title(message) - - def trouble(self, *args, **kargs): - self.ydl.trouble(*args, **kargs) - - def report_warning(self, *args, **kargs): - self.ydl.report_warning(*args, **kargs) - - def report_error(self, *args, **kargs): - self.ydl.report_error(*args, **kargs) - - def write_debug(self, *args, **kargs): - self.ydl.write_debug(*args, **kargs) + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -210,28 +226,43 @@ class FileDownloader(object): def ytdl_filename(self, filename): return filename + '.ytdl' - def sanitize_open(self, filename, open_mode): - file_access_retries = self.params.get('file_access_retries', 10) - retry = 0 - while True: - try: - return sanitize_open(filename, open_mode) - except (IOError, OSError) as err: - retry = retry + 1 - if retry > file_access_retries or err.errno not in (errno.EACCES,): - raise - self.to_screen( - '[download] Got file access error. Retrying (attempt %d of %s) ...' - % (retry, self.format_retries(file_access_retries))) - time.sleep(0.01) + def wrap_file_access(action, *, fatal=False): + def error_callback(err, count, retries, *, fd): + return RetryManager.report_retry( + err, count, retries, info=fd.__to_screen, + warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), + error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), + sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) + def wrapper(self, func, *args, **kwargs): + for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self): + try: + return func(self, *args, **kwargs) + except OSError as err: + if err.errno in (errno.EACCES, errno.EINVAL): + retry.error = err + continue + retry.error_callback(err, 1, 0) + + return functools.partial(functools.partialmethod, wrapper) + + @wrap_file_access('open', fatal=True) + def sanitize_open(self, filename, open_mode): + f, filename = sanitize_open(filename, open_mode) + if not getattr(f, 'locked', None): + self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) + return f, filename + + @wrap_file_access('remove') + def try_remove(self, filename): + if os.path.isfile(filename): + os.remove(filename) + + @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): if old_filename == new_filename: return - try: - os.replace(old_filename, new_filename) - except (IOError, OSError) as err: - self.report_error(f'unable to rename file: {err}') + os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -248,10 +279,8 @@ class FileDownloader(object): # Ignore obviously invalid dates if filetime == 0: return - try: + with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) - except Exception: - pass return filetime def report_destination(self, filename): @@ -264,26 +293,27 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) else: - self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) - self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') + self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) + self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color' + self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out def _finish_multiline_status(self): self._multiline.end() - _progress_styles = { - 'downloaded_bytes': 'light blue', - 'percent': 'light blue', - 'eta': 'yellow', - 'speed': 'green', - 'elapsed': 'bold white', - 'total_bytes': '', - 'total_bytes_estimate': '', - } + ProgressStyles = Namespace( + downloaded_bytes='light blue', + percent='light blue', + eta='yellow', + speed='green', + elapsed='bold white', + total_bytes='', + total_bytes_estimate='', + ) def _report_progress_status(self, s, default_template): - for name, style in self._progress_styles.items(): + for name, style in self.ProgressStyles.items_: name = f'_{name}_str' if name not in s: continue @@ -307,78 +337,73 @@ class FileDownloader(object): self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) def report_progress(self, s): + def with_fields(*tups, default=''): + for *fields, tmpl in tups: + if all(s.get(f) is not None for f in fields): + return tmpl + return default + + _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' + if s['status'] == 'finished': if self.params.get('noprogress'): self.to_screen('[download] Download completed') - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - s['_percent_str'] = self.format_percent(100) - self._report_progress_status(s, msg_template) - return + speed = try_call(lambda: s['total_bytes'] / s['elapsed']) + s.update({ + 'speed': speed, + '_speed_str': self.format_speed(speed).strip(), + '_total_bytes_str': _format_bytes('total_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + '_percent_str': self.format_percent(100), + }) + self._report_progress_status(s, join_nonempty( + '100%%', + with_fields(('total_bytes', 'of %(_total_bytes_str)s')), + with_fields(('elapsed', 'in %(_elapsed_str)s')), + with_fields(('speed', 'at %(_speed_str)s')), + delim=' ')) if s['status'] != 'downloading': return - if s.get('eta') is not None: - s['_eta_str'] = self.format_eta(s['eta']) - else: - s['_eta_str'] = 'Unknown' + s.update({ + '_eta_str': self.format_eta(s.get('eta')).strip(), + '_speed_str': self.format_speed(s.get('speed')), + '_percent_str': self.format_percent(try_call( + lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], + lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], + lambda: s['downloaded_bytes'] == 0 and 0)), + '_total_bytes_str': _format_bytes('total_bytes'), + '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), + '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + }) - if s.get('total_bytes') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) - elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) - else: - if s.get('downloaded_bytes') == 0: - s['_percent_str'] = self.format_percent(0) - else: - s['_percent_str'] = 'Unknown %' + msg_template = with_fields( + ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), + ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), + default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') - if s.get('speed') is not None: - s['_speed_str'] = self.format_speed(s['speed']) - else: - s['_speed_str'] = 'Unknown speed' - - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' - elif s.get('total_bytes_estimate') is not None: - s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) - msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' - else: - if s.get('downloaded_bytes') is not None: - s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) - if s.get('elapsed'): - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' - else: - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' - else: - msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s' - if s.get('fragment_index') and s.get('fragment_count'): - msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)' - elif s.get('fragment_index'): - msg_template += ' (frag %(fragment_index)s)' + msg_template += with_fields( + ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), + ('fragment_index', ' (frag %(fragment_index)s)')) self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) - def report_retry(self, err, count, retries): - """Report retry in case of HTTP error 5xx""" - self.to_screen( - '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' - % (error_to_compat_str(err), count, self.format_retries(retries))) - - def report_file_already_downloaded(self, *args, **kwargs): - """Report file has already been fully downloaded.""" - return self.ydl.report_file_already_downloaded(*args, **kwargs) + def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): + """Report retry""" + is_frag = False if frag_index is NO_DEFAULT else 'fragment' + RetryManager.report_retry( + err, count, retries, info=self.__to_screen, + warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), + error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), + sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), + suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) def report_unable_to_resume(self): """Report it was impossible to resume download.""" @@ -394,7 +419,6 @@ class FileDownloader(object): """Download to a filename using the info from info_dict Return True on success and False otherwise """ - nooverwrites_and_exists = ( not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) @@ -418,25 +442,16 @@ class FileDownloader(object): self._finish_multiline_status() return True, False - if subtitle is False: - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) + if subtitle: + sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: - sleep_interval_sub = 0 - if type(self.params.get('sleep_interval_subtitles')) is int: - sleep_interval_sub = self.params.get('sleep_interval_subtitles') - if sleep_interval_sub > 0: - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - sleep_interval_sub)) - time.sleep(sleep_interval_sub) + min_sleep_interval = self.params.get('sleep_interval') or 0 + sleep_interval = random.uniform( + min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) + if sleep_interval > 0: + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + time.sleep(sleep_interval) + ret = self.real_download(filename, info_dict) self._finish_multiline_status() return ret, True @@ -446,8 +461,7 @@ class FileDownloader(object): raise NotImplementedError('This method must be implemented by subclasses') def _hook_progress(self, status, info_dict): - if not self._progress_hooks: - return + # Ideally we want to make a copy of the dict, but that is too slow status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. @@ -469,4 +483,4 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) + self.write_debug(f'{exe} command line: {shell_quote(str_args)}') diff --git a/plugins/youtube_download/yt_dlp/downloader/dash.py b/plugins/youtube_download/yt_dlp/downloader/dash.py index a845ee7..4328d73 100644 --- a/plugins/youtube_download/yt_dlp/downloader/dash.py +++ b/plugins/youtube_download/yt_dlp/downloader/dash.py @@ -1,10 +1,9 @@ -from __future__ import unicode_literals import time +import urllib.parse -from ..downloader import get_suitable_downloader +from . import get_suitable_downloader from .fragment import FragmentFD - -from ..utils import urljoin +from ..utils import update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -42,24 +41,29 @@ class DashSegmentsFD(FragmentFD): self._prepare_and_start_frag_download(ctx, fmt) ctx['start'] = real_start - fragments_to_download = self._get_fragments(fmt, ctx) + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + + fragments_to_download = self._get_fragments(fmt, ctx, extra_query) if real_downloader: self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) args.append([ctx, fragments_to_download, fmt]) - return self.download_and_append_fragments_multiple(*args) + return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) def _resolve_fragments(self, fragments, ctx): fragments = fragments(ctx) if callable(fragments) else fragments return [next(iter(fragments))] if self.params.get('test') else fragments - def _get_fragments(self, fmt, ctx): + def _get_fragments(self, fmt, ctx, extra_query): fragment_base_url = fmt.get('fragment_base_url') fragments = self._resolve_fragments(fmt['fragments'], ctx) @@ -72,9 +76,12 @@ class DashSegmentsFD(FragmentFD): if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) + if extra_query: + fragment_url = update_url_query(fragment_url, extra_query) yield { 'frag_index': frag_index, + 'fragment_count': fragment.get('fragment_count'), 'index': i, 'url': fragment_url, } diff --git a/plugins/youtube_download/yt_dlp/downloader/external.py b/plugins/youtube_download/yt_dlp/downloader/external.py index f4fdcf1..4ce8a3b 100644 --- a/plugins/youtube_download/yt_dlp/downloader/external.py +++ b/plugins/youtube_download/yt_dlp/downloader/external.py @@ -1,39 +1,49 @@ -from __future__ import unicode_literals - -import os.path +import enum +import json +import os import re import subprocess import sys +import tempfile import time +import uuid from .fragment import FragmentFD -from ..compat import ( - compat_setenv, - compat_str, -) -from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS +from ..compat import functools +from ..networking import Request +from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( + Popen, + RetryManager, + _configuration_args, + check_executable, + classproperty, + cli_bool_option, cli_option, cli_valueless_option, - cli_bool_option, - _configuration_args, determine_ext, - encodeFilename, encodeArgument, - handle_youtubedl_headers, - check_executable, - Popen, + encodeFilename, + find_available_port, remove_end, + traverse_obj, ) +class Features(enum.Enum): + TO_STDOUT = enum.auto() + MULTIPLE_FORMATS = enum.auto() + + class ExternalFD(FragmentFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') - can_download_to_stdout = False + SUPPORTED_FEATURES = () + _CAPTURE_STDERR = True def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) + self._cookies_tempfile = None try: started = time.time() @@ -46,6 +56,9 @@ class ExternalFD(FragmentFD): # should take place retval = 0 self.to_screen('[%s] Interrupted by user' % self.get_basename()) + finally: + if self._cookies_tempfile: + self.try_remove(self._cookies_tempfile) if retval == 0: status = { @@ -55,7 +68,6 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -73,23 +85,32 @@ class ExternalFD(FragmentFD): def get_basename(cls): return cls.__name__[:-2].lower() - @property + @classproperty + def EXE_NAME(cls): + return cls.get_basename() + + @functools.cached_property def exe(self): - return self.get_basename() + return self.EXE_NAME @classmethod def available(cls, path=None): - path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) - if path: - cls.exe = path - return path - return False + path = check_executable( + cls.EXE_NAME if path in (None, cls.get_basename()) else path, + [cls.AVAILABLE_OPT]) + if not path: + return False + cls.exe = path + return path @classmethod def supports(cls, info_dict): - return ( - (cls.can_download_to_stdout or not info_dict.get('to_stdout')) - and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS) + return all(( + not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, + '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), + all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), + )) @classmethod def can_download(cls, info_dict, path=None): @@ -106,9 +127,19 @@ class ExternalFD(FragmentFD): def _configuration_args(self, keys=None, *args, **kwargs): return _configuration_args( - self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) + def _write_cookies(self): + if not self.ydl.cookiejar.filename: + tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False) + tmp_cookies.close() + self._cookies_tempfile = tmp_cookies.name + self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"') + # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename + self.ydl.cookiejar.save(self._cookies_tempfile) + return self.ydl.cookiejar.filename or self._cookies_tempfile + def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] @@ -116,33 +147,27 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - p = Popen(cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate_or_kill() - if p.returncode != 0: - self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + _, stderr, returncode = self._call_process(cmd, info_dict) + if returncode and stderr: + self.to_stderr(stderr) + return returncode - fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - count = 0 - while count <= fragment_retries: - p = Popen(cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate_or_kill() - if p.returncode == 0: + retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, + frag_index=None, fatal=not skip_unavailable_fragments) + for retry in retry_manager: + _, stderr, returncode = self._call_process(cmd, info_dict) + if not returncode: break # TODO: Decide whether to retry based on error code # https://aria2.github.io/manual/en/html/aria2c.html#exit-status - self.to_stderr(stderr.decode('utf-8', 'replace')) - count += 1 - if count <= fragment_retries: - self.to_screen( - '[%s] Got error. Retrying fragments (attempt %d of %s)...' - % (self.get_basename(), count, self.format_retries(fragment_retries))) - if count > fragment_retries: - if not skip_unavailable_fragments: - self.report_error('Giving up after %s fragment retries' % fragment_retries) - return -1 + if stderr: + self.to_stderr(stderr) + retry.error = Exception() + continue + if not skip_unavailable_fragments and retry_manager.error: + return -1 decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') @@ -150,7 +175,7 @@ class ExternalFD(FragmentFD): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = self.sanitize_open(fragment_filename, 'rb') - except IOError as err: + except OSError as err: if skip_unavailable_fragments and frag_index > 1: self.report_skip_fragment(frag_index, err) continue @@ -159,20 +184,27 @@ class ExternalFD(FragmentFD): dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(fragment_filename)) + self.try_remove(encodeFilename(fragment_filename)) dest.close() - os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' + _CAPTURE_STDERR = False # curl writes the progress to stderr def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '--location', '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['--cookie', cookie_header] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') @@ -191,16 +223,6 @@ class CurlFD(ExternalFD): cmd += ['--', info_dict['url']] return cmd - def _call_downloader(self, tmpfilename, info_dict): - cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] - - self._debug_cmd(cmd) - - # curl writes the progress to stderr so don't capture it. - p = Popen(cmd) - p.communicate_or_kill() - return p.returncode - class AxelFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -209,7 +231,10 @@ class AxelFD(ExternalFD): cmd = [self.exe, '-o', tmpfilename] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + cmd += ['-H', f'{key}: {val}'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -219,10 +244,12 @@ class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto'] + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += ['--load-cookies', self._write_cookies()] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -230,7 +257,10 @@ class WgetFD(ExternalFD): retry[1] = '0' cmd += retry cmd += self._option('--bind-address', 'source_address') - cmd += self._option('--proxy', 'proxy') + proxy = self.params.get('proxy') + if proxy: + for var in ('http_proxy', 'https_proxy'): + cmd += ['--execute', f'{var}={proxy}'] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -250,18 +280,33 @@ class Aria2cFD(ExternalFD): check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) return all(check_results) + @staticmethod + def _aria2c_filename(fn): + return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + + def _call_downloader(self, tmpfilename, info_dict): + # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 + if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c', + cmd = [self.exe, '-c', '--no-conf', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', - '--file-allocation=none', '-x16', '-j16', '-s16'] + '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] else: cmd += ['--min-split-size', '1M'] + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += [f'--load-cookies={self._write_cookies()}'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') @@ -270,6 +315,12 @@ class Aria2cFD(ExternalFD): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -278,11 +329,9 @@ class Aria2cFD(ExternalFD): # https://github.com/aria2/aria2/issues/1373 dn = os.path.dirname(tmpfilename) if dn: - if not os.path.isabs(dn): - dn = '.%s%s' % (os.path.sep, dn) - cmd += ['--dir', dn + os.path.sep] + cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep] if 'fragments' not in info_dict: - cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: @@ -291,35 +340,121 @@ class Aria2cFD(ExternalFD): url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') - stream.write('\n'.join(url_list).encode('utf-8')) + stream.write('\n'.join(url_list).encode()) stream.close() - cmd += ['-i', url_list_file] + cmd += ['-i', self._aria2c_filename(url_list_file)] else: cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + }, proxies={'all': None}) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' - - @classmethod - def available(cls, path=None): - return super().available(path or 'http') + EXE_NAME = 'http' def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + cmd += [f'{key}:{val}'] + + # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1] + # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2] + # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq + # 2: https://httpie.io/docs/cli/sessions + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += [f'Cookie:{cookie_header}'] return cmd class FFmpegFD(ExternalFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') - can_download_to_stdout = True + SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS) @classmethod def available(cls, path=None): @@ -327,10 +462,6 @@ class FFmpegFD(ExternalFD): # Fixme: This may be wrong when --ffmpeg-location is used return FFmpegPostProcessor().available - @classmethod - def supports(cls, info_dict): - return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) - def on_process_started(self, proc, stdin): """ Override this in subclasses """ pass @@ -345,7 +476,6 @@ class FFmpegFD(ExternalFD): and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') @@ -361,9 +491,11 @@ class FFmpegFD(ExternalFD): if not self.params.get('verbose'): args += ['-hide_banner'] - args += info_dict.get('_ffmpeg_args', []) + args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) - # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead + # These exists only for compatibility. Extractors should use + # info_dict['downloader_options']['ffmpeg_args'] instead + args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server @@ -373,21 +505,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - # start_time = info_dict.get('start_time') or 0 - # if start_time: - # args += ['-ss', compat_str(start_time)] - # end_time = info_dict.get('end_time') - # if end_time: - # args += ['-t', compat_str(end_time - start_time)] - - if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - headers = handle_youtubedl_headers(info_dict['http_headers']) - args += [ - '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] - env = None proxy = self.params.get('proxy') if proxy: @@ -404,8 +521,8 @@ class FFmpegFD(ExternalFD): # We could switch to the following code if we are able to detect version properly # args += ['-http_proxy', proxy] env = os.environ.copy() - compat_setenv('HTTP_PROXY', proxy, env=env) - compat_setenv('http_proxy', proxy, env=env) + env['HTTP_PROXY'] = proxy + env['http_proxy'] = proxy protocol = info_dict.get('protocol') @@ -435,20 +552,41 @@ class FFmpegFD(ExternalFD): if isinstance(conn, list): for entry in conn: args += ['-rtmp_conn', entry] - elif isinstance(conn, compat_str): + elif isinstance(conn, str): args += ['-rtmp_conn', conn] - for i, url in enumerate(urls): - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') + + selected_formats = info_dict.get('requested_formats') or [info_dict] + for i, fmt in enumerate(selected_formats): + is_http = re.match(r'^https?://', fmt['url']) + cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] + if cookies: + args.extend(['-cookies', ''.join( + f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n' + for cookie in cookies)]) + if fmt.get('http_headers') and is_http: + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) + + if start_time: + args += ['-ss', str(start_time)] + if end_time: + args += ['-t', str(end_time - start_time)] + + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + + if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): + args += ['-c', 'copy'] - args += ['-c', 'copy'] if info_dict.get('requested_formats') or protocol == 'http_dash_segments': - for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + for i, fmt in enumerate(selected_formats): stream_number = fmt.get('manifest_stream_number', 0) args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): - args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + args += ['-fs', str(self._TEST_FILE_SIZE)] ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): @@ -483,35 +621,35 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) - proc = Popen(args, stdin=subprocess.PIPE, env=env) - if url in ('-', 'pipe:'): - self.on_process_started(proc, proc.stdin) - try: - retval = proc.wait() - except BaseException as e: - # subprocces.run would send the SIGKILL signal to ffmpeg and the - # mp4 file couldn't be played, but if we ask ffmpeg to quit it - # produces a file that is playable (this is mostly useful for live - # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): - proc.communicate_or_kill(b'q') - else: - proc.kill() - proc.wait() - raise - return retval + piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) + with Popen(args, stdin=subprocess.PIPE, env=env) as proc: + if piped: + self.on_process_started(proc, proc.stdin) + try: + retval = proc.wait() + except BaseException as e: + # subprocces.run would send the SIGKILL signal to ffmpeg and the + # mp4 file couldn't be played, but if we ask ffmpeg to quit it + # produces a file that is playable (this is mostly useful for live + # streams). Note that Windows is not affected and produces playable + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped: + proc.communicate_or_kill(b'q') + else: + proc.kill(timeout=None) + raise + return retval class AVconvFD(FFmpegFD): pass -_BY_NAME = dict( - (klass.get_basename(), klass) +_BY_NAME = { + klass.get_basename(): klass for name, klass in globals().items() if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') -) +} def list_external_downloaders(): @@ -519,8 +657,8 @@ def list_external_downloaders(): def get_external_downloader(external_downloader): - """ Given the name of the executable, see whether we support the given - downloader . """ - # Drop .exe extension on Windows + """ Given the name of the executable, see whether we support the given downloader """ bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME.get(bn) + return _BY_NAME.get(bn) or next(( + klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn + ), None) diff --git a/plugins/youtube_download/yt_dlp/downloader/f4m.py b/plugins/youtube_download/yt_dlp/downloader/f4m.py index 0008b7c..28cbba0 100644 --- a/plugins/youtube_download/yt_dlp/downloader/f4m.py +++ b/plugins/youtube_download/yt_dlp/downloader/f4m.py @@ -1,23 +1,14 @@ -from __future__ import division, unicode_literals - +import base64 import io import itertools +import struct import time +import urllib.parse from .fragment import FragmentFD -from ..compat import ( - compat_b64decode, - compat_etree_fromstring, - compat_urlparse, - compat_urllib_error, - compat_urllib_parse_urlparse, - compat_struct_pack, - compat_struct_unpack, -) -from ..utils import ( - fix_xml_ampersands, - xpath_text, -) +from ..compat import compat_etree_fromstring +from ..networking.exceptions import HTTPError +from ..utils import fix_xml_ampersands, xpath_text class DataTruncatedError(Exception): @@ -40,13 +31,13 @@ class FlvReader(io.BytesIO): # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return compat_struct_unpack('!Q', self.read_bytes(8))[0] + return struct.unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return compat_struct_unpack('!I', self.read_bytes(4))[0] + return struct.unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return compat_struct_unpack('!B', self.read_bytes(1))[0] + return struct.unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' @@ -193,7 +184,7 @@ def build_fragments_list(boot_info): first_frag_number = fragment_run_entry_table[0]['first'] fragments_counter = itertools.count(first_frag_number) for segment, fragments_count in segment_run_table['segment_run']: - # In some live HDS streams (for example Rai), `fragments_count` is + # In some live HDS streams (e.g. Rai), `fragments_count` is # abnormal and causing out-of-memory errors. It's OK to change the # number of fragments for live streams as they are updated periodically if fragments_count == 4294967295 and boot_info['live']: @@ -208,11 +199,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(compat_struct_pack('!I', val)) + stream.write(struct.pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(compat_struct_pack('!I', val)[1:]) + stream.write(struct.pack('!I', val)[1:]) def write_flv_header(stream): @@ -261,8 +252,6 @@ class F4mFD(FragmentFD): A downloader for f4m manifests or AdobeHDS. """ - FD_NAME = 'f4m' - def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: @@ -308,12 +297,12 @@ class F4mFD(FragmentFD): # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m bootstrap_url = node.get('url') if bootstrap_url: - bootstrap_url = compat_urlparse.urljoin( + bootstrap_url = urllib.parse.urljoin( base_url, bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = compat_b64decode(node.text) + bootstrap = base64.b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url @@ -323,7 +312,7 @@ class F4mFD(FragmentFD): self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() + man_url = urlh.url # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # and https://github.com/ytdl-org/youtube-dl/issues/7823) @@ -343,14 +332,14 @@ class F4mFD(FragmentFD): # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url - base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) + base_url = urllib.parse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) boot_info, bootstrap_url = self._parse_bootstrap_node( bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = compat_b64decode(metadata_node.text) + metadata = base64.b64decode(metadata_node.text) else: metadata = None @@ -378,7 +367,7 @@ class F4mFD(FragmentFD): if not live: write_metadata_tag(dest_stream, metadata) - base_url_parsed = compat_urllib_parse_urlparse(base_url) + base_url_parsed = urllib.parse.urlparse(base_url) self._start_frag_download(ctx, info_dict) @@ -398,9 +387,10 @@ class F4mFD(FragmentFD): query.append(info_dict['extra_param_to_segment_url']) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) try: - success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) + success = self._download_fragment(ctx, url_parsed.geturl(), info_dict) if not success: return False + down_data = self._read_fragment(ctx) reader = FlvReader(down_data) while True: try: @@ -417,8 +407,8 @@ class F4mFD(FragmentFD): if box_type == b'mdat': self._append_fragment(ctx, box_data) break - except (compat_urllib_error.HTTPError, ) as err: - if live and (err.code == 404 or err.code == 410): + except HTTPError as err: + if live and (err.status == 404 or err.status == 410): # We didn't keep up with the live window. Continue # with the next available fragment. msg = 'Fragment %d unavailable' % frag_i @@ -434,6 +424,4 @@ class F4mFD(FragmentFD): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) - self._finish_frag_download(ctx, info_dict) - - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/fc2.py b/plugins/youtube_download/yt_dlp/downloader/fc2.py new file mode 100644 index 0000000..f9763de --- /dev/null +++ b/plugins/youtube_download/yt_dlp/downloader/fc2.py @@ -0,0 +1,46 @@ +import threading + +from .common import FileDownloader +from .external import FFmpegFD + + +class FC2LiveFD(FileDownloader): + """ + Downloads FC2 live without being stopped.
+ Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + ws = info_dict['ws'] + + heartbeat_lock = threading.Lock() + heartbeat_state = [None, 1] + + def heartbeat(): + if heartbeat_state[1] < 0: + return + + try: + heartbeat_state[1] += 1 + ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) + except Exception: + self.to_screen('[fc2:live] Heartbeat failed') + + with heartbeat_lock: + heartbeat_state[0] = threading.Timer(30, heartbeat) + heartbeat_state[0]._daemonic = True + heartbeat_state[0].start() + + heartbeat() + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'ws': None, + 'protocol': 'live_ffmpeg', + }) + try: + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + finally: + # stop heartbeating + heartbeat_state[1] = -1 diff --git a/plugins/youtube_download/yt_dlp/downloader/fragment.py b/plugins/youtube_download/yt_dlp/downloader/fragment.py index 19c0990..b4b680d 100644 --- a/plugins/youtube_download/yt_dlp/downloader/fragment.py +++ b/plugins/youtube_download/yt_dlp/downloader/fragment.py @@ -1,40 +1,26 @@ -from __future__ import division, unicode_literals - -import http.client +import concurrent.futures +import contextlib import json import math import os +import struct import time -try: - import concurrent.futures - can_threaded_download = True -except ImportError: - can_threaded_download = False - from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import ( - compat_os_name, - compat_urllib_error, - compat_struct_pack, -) -from ..utils import ( - DownloadError, - error_to_compat_str, - encodeFilename, - sanitized_Request, -) +from ..compat import compat_os_name +from ..networking import Request +from ..networking.exceptions import HTTPError, IncompleteRead +from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj +from ..utils.networking import HTTPHeaderDict class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass - def report_retry(self, err, count, retries): - super().to_screen( - f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + to_console_title = to_screen class FragmentFD(FileDownloader): @@ -43,8 +29,8 @@ class FragmentFD(FileDownloader): Available options: - fragment_retries: Number of times to retry a fragment for HTTP error (DASH - and hlsnative only) + fragment_retries: Number of times to retry a fragment for HTTP error + (DASH and hlsnative only). Default is 0 for API, but 10 for CLI skip_unavailable_fragments: Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is @@ -74,9 +60,9 @@ class FragmentFD(FileDownloader): """ def report_retry_fragment(self, err, frag_index, count, retries): - self.to_screen( - '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' - % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) + self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. ' + 'Use yt_dlp.downloader.FileDownloader.report_retry instead') + return self.report_retry(err, count, retries, frag_index) def report_skip_fragment(self, frag_index, err=None): err = f' {err};' if err else '' @@ -84,7 +70,7 @@ class FragmentFD(FileDownloader): def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') - return sanitized_Request(url, None, headers) if headers else url + return Request(url, None, headers) if headers else url def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) @@ -130,16 +116,28 @@ class FragmentFD(FileDownloader): 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } - success = ctx['dl'].download(fragment_filename, fragment_info_dict) + frag_resume_len = 0 + if ctx['dl'].params.get('continuedl', True): + frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename)) + fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len + + success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: - return False, None + return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - return True, self._read_fragment(ctx) + return True def _read_fragment(self, ctx): - down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + if not ctx.get('fragment_filename_sanitized'): + return None + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() @@ -153,42 +151,34 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): - if 'live' not in ctx: - ctx['live'] = False - if not ctx['live']: + if not ctx.setdefault('live', False): total_frags_str = '%d' % ctx['total_frags'] ad_frags = ctx.get('ad_frags', 0) if ad_frags: total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) - dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': self.params.get('quiet'), - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit'), - 'retries': self.params.get('retries', 0), - 'nopart': self.params.get('nopart', False), - 'test': self.params.get('test', False), - } - ) + dl = HttpQuietDownloader(self.ydl, { + **self.params, + 'noprogress': True, + 'test': False, + 'sleep_interval': 0, + 'max_sleep_interval': 0, + 'sleep_interval_subtitles': 0, + }) tmpfilename = self.temp_name(ctx['filename']) open_mode = 'wb' - resume_len = 0 # Establish possible resume length - if os.path.isfile(encodeFilename(tmpfilename)): + resume_len = self.filesize_or_none(tmpfilename) + if resume_len > 0: open_mode = 'ab' - resume_len = os.path.getsize(encodeFilename(tmpfilename)) # Should be initialized before ytdl file check ctx.update({ @@ -197,7 +187,9 @@ class FragmentFD(FileDownloader): }) if self.__do_ytdl_file(ctx): - if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): + ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) + continuedl = self.params.get('continuedl', True) + if continuedl and ytdl_file_exists: self._read_ytdl_file(ctx) is_corrupt = ctx.get('ytdl_corrupt') is True is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 @@ -211,7 +203,12 @@ class FragmentFD(FileDownloader): if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] self._write_ytdl_file(ctx) + else: + if not continuedl: + if ytdl_file_exists: + self._read_ytdl_file(ctx) + ctx['fragment_index'] = resume_len = 0 self._write_ytdl_file(ctx) assert ctx['fragment_index'] == 0 @@ -253,6 +250,9 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return + if not total_frags and ctx.get('fragment_count'): + state['fragment_count'] = ctx['fragment_count'] + if ctx_id is not None and s.get('ctx_id') != ctx_id: return @@ -281,12 +281,10 @@ class FragmentFD(FileDownloader): else: frag_downloaded_bytes = s['downloaded_bytes'] state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] - if not ctx['live']: - state['eta'] = self.calc_eta( - start, time_now, estimated_size - resume_len, - state['downloaded_bytes'] - resume_len) ctx['speed'] = state['speed'] = self.calc_speed( - ctx['fragment_started'], time_now, frag_downloaded_bytes) + ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0)) + if not ctx['live']: + state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes']) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) @@ -297,23 +295,26 @@ class FragmentFD(FileDownloader): def _finish_frag_download(self, ctx, info_dict): ctx['dest_stream'].close() if self.__do_ytdl_file(ctx): - ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) - if os.path.isfile(ytdl_filename): - os.remove(ytdl_filename) + self.try_remove(self.ytdl_filename(ctx['filename'])) elapsed = time.time() - ctx['started'] - if ctx['tmpfilename'] == '-': - downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + to_file = ctx['tmpfilename'] != '-' + if to_file: + downloaded_bytes = self.filesize_or_none(ctx['tmpfilename']) else: + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + + if not downloaded_bytes: + if to_file: + self.try_remove(ctx['tmpfilename']) + self.report_error('The downloaded file is empty') + return False + elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) - if self.params.get('updatetime', True): - filetime = ctx.get('fragment_filetime') - if filetime: - try: - os.utime(ctx['filename'], (time.time(), filetime)) - except Exception: - pass - downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) + filetime = ctx.get('fragment_filetime') + if self.params.get('updatetime', True) and filetime: + with contextlib.suppress(Exception): + os.utime(ctx['filename'], (time.time(), filetime)) self._hook_progress({ 'downloaded_bytes': downloaded_bytes, @@ -325,6 +326,7 @@ class FragmentFD(FileDownloader): 'max_progress': ctx.get('max_progress'), 'progress_idx': ctx.get('progress_idx'), }, info_dict) + return True def _prepare_external_frag_download(self, ctx): if 'live' not in ctx: @@ -336,8 +338,7 @@ class FragmentFD(FileDownloader): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) @@ -356,11 +357,14 @@ class FragmentFD(FileDownloader): return _key_cache[url] def decrypt_fragment(fragment, frag_content): + if frag_content is None: + return decrypt_info = fragment.get('decrypt_info') if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = (decrypt_info.get('KEY') + or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI'])) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. @@ -370,7 +374,7 @@ class FragmentFD(FileDownloader): return decrypt_fragment - def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + def download_and_append_fragments_multiple(self, *args, **kwargs): ''' @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list @@ -378,146 +382,156 @@ class FragmentFD(FileDownloader): interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: - return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + return self.download_and_append_fragments(*args[0], **kwargs) max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) + is_live = any(traverse_obj(args, (..., 2, 'is_live'))) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx return self.download_and_append_fragments( - ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, - tpe=tpe, interrupt_trigger=interrupt_trigger) + ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself def __exit__(self, exc_type, exc_val, exc_tb): pass - spins = [] if compat_os_name == 'nt': - self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. ' - 'This is a known issue and patches are welcome') + def future_result(future): + while True: + try: + return future.result(0.1) + except KeyboardInterrupt: + raise + except concurrent.futures.TimeoutError: + continue + else: + def future_result(future): + return future.result() + + def interrupt_trigger_iter(fg): + for f in fg: + if not interrupt_trigger[0]: + break + yield f + + spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) - job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: - result = result and job.result() + result = result and future_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) - if not interrupt_trigger[0]: + if not interrupt_trigger[0] and not is_live: raise KeyboardInterrupt() + # we expect the user wants to stop and DO WANT the preceding postprocessors to run; + # so returning a intermediate result here instead of KeyboardInterrupt on live return result def download_and_append_fragments( - self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, - tpe=None, interrupt_trigger=None): - if not interrupt_trigger: - interrupt_trigger = (True, ) + self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False), + pack_func=(lambda content, idx: content), finish_func=None, + tpe=None, interrupt_trigger=(True, )): - fragment_retries = self.params.get('fragment_retries', 0) - is_fatal = ( - ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) - if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) - - if not pack_func: - pack_func = lambda frag_content, _: frag_content + if not self.params.get('skip_unavailable_fragments', True): + is_fatal = lambda _: True def download_fragment(fragment, ctx): + if not interrupt_trigger[0]: + return + frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None - if not interrupt_trigger[0]: - return False, frag_index - headers = info_dict.get('http_headers', {}).copy() + headers = HTTPHeaderDict(info_dict.get('http_headers')) byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment fatal = is_fatal(fragment.get('index') or (frag_index - 1)) - count, frag_content = 0, None - while count <= fragment_retries: - try: - success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) - if not success: - return False, frag_index - break - except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: - # Unavailable (possibly temporary) fragments may be served. - # First we try to retry then either skip or abort. - # See https://github.com/ytdl-org/youtube-dl/issues/10165, - # https://github.com/ytdl-org/youtube-dl/issues/10448). - count += 1 - ctx['last_error'] = err - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - except DownloadError: - # Don't retry fragment if error occurred during HTTP downloading - # itself since it has own retry settings - if not fatal: - break - raise - if count > fragment_retries: - if not fatal: - return False, frag_index - ctx['dest_stream'].close() - self.report_error('Giving up after %s fragment retries' % fragment_retries) - return False, frag_index - return frag_content, frag_index + def error_callback(err, count, retries): + if fatal and count > retries: + ctx['dest_stream'].close() + self.report_retry(err, count, retries, frag_index, fatal) + ctx['last_error'] = err + + for retry in RetryManager(self.params.get('fragment_retries'), error_callback): + try: + ctx['fragment_count'] = fragment.get('fragment_count') + if not self._download_fragment( + ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): + return + except (HTTPError, IncompleteRead) as err: + retry.error = err + continue + except DownloadError: # has own retry settings + if fatal: + raise def append_fragment(frag_content, frag_index, ctx): - if not frag_content: - if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index, 'fragment not found') - return True - else: - ctx['dest_stream'].close() - self.report_error( - 'fragment %s not found, unable to continue' % frag_index) - return False - self._append_fragment(ctx, pack_func(frag_content, frag_index)) + if frag_content: + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + elif not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + else: + ctx['dest_stream'].close() + self.report_error(f'fragment {frag_index} not found, unable to continue') + return False return True decrypt_fragment = self.decrypter(info_dict) max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) - if can_threaded_download and max_workers > 1: - + if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() - frag_content, frag_index = download_fragment(fragment, ctx_copy) - return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') - self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + self.report_warning('The download speed shown is only of one thread. This is a known issue') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): - if not interrupt_trigger[0]: - break - ctx['fragment_filename_sanitized'] = frag_filename - ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) - if not result: - return False + try: + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx.update({ + 'fragment_filename_sanitized': frag_filename, + 'fragment_index': frag_index, + }) + if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): + return False + except KeyboardInterrupt: + self._finish_multiline_status() + self.report_error( + 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) + pool.shutdown(wait=False) + raise else: for fragment in fragments: if not interrupt_trigger[0]: break - frag_content, frag_index = download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + try: + download_fragment(fragment, ctx) + result = append_fragment( + decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + except KeyboardInterrupt: + if info_dict.get('is_live'): + break + raise if not result: return False if finish_func is not None: ctx['dest_stream'].write(finish_func()) ctx['dest_stream'].flush() - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/hls.py b/plugins/youtube_download/yt_dlp/downloader/hls.py index e932fd6..d4b3f03 100644 --- a/plugins/youtube_download/yt_dlp/downloader/hls.py +++ b/plugins/youtube_download/yt_dlp/downloader/hls.py @@ -1,23 +1,21 @@ -from __future__ import unicode_literals - -import re -import io import binascii +import io +import re +import urllib.parse -from ..downloader import get_suitable_downloader -from .fragment import FragmentFD +from . import get_suitable_downloader from .external import FFmpegFD - -from ..compat import ( - compat_pycrypto_AES, - compat_urlparse, -) -from ..utils import ( - parse_m3u8_attributes, - update_url_query, - bug_reports_message, -) +from .fragment import FragmentFD from .. import webvtt +from ..dependencies import Cryptodome +from ..utils import ( + bug_reports_message, + parse_m3u8_attributes, + remove_start, + traverse_obj, + update_url_query, + urljoin, +) class HlsFD(FragmentFD): @@ -30,7 +28,16 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict, allow_unplayable_formats=False): + def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 + return bool(re.search('|'.join(( + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + )), manifest)) + + @classmethod + def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -52,13 +59,15 @@ class HlsFD(FragmentFD): ] if not allow_unplayable_formats: UNSUPPORTED_FEATURES += [ - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM ] def check_results(): yield not info_dict.get('is_live') for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) + if not allow_unplayable_formats: + yield not cls._has_drm(manifest) return all(check_results()) def real_download(self, filename, info_dict): @@ -66,25 +75,30 @@ class HlsFD(FragmentFD): self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() + man_url = urlh.url s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None - if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: - if FFmpegFD.available(): + if can_download: + has_ffmpeg = FFmpegFD.available() + no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s + if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' - else: + elif no_crypto: message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') + elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): + install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' + message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' + f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') if not can_download: - has_drm = re.search('|'.join([ - r'#EXT-X-FAXS-CM:', # Adobe Flash Access - r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay - ]), s) - if has_drm and not self.params.get('allow_unplayable_formats'): - self.report_error( - 'This video is DRM protected; Try selecting another format with --format or ' - 'add --check-formats to automatically fallback to the next best format') + if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): + if info_dict.get('has_drm') and self.params.get('test'): + self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) + else: + self.report_error( + 'This format is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format', tb=False) return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) @@ -102,8 +116,7 @@ class HlsFD(FragmentFD): if real_downloader and not real_downloader.supports_manifest(s): real_downloader = None if real_downloader: - self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s @@ -150,10 +163,17 @@ class HlsFD(FragmentFD): extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: - extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} + external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) + if external_aes_key: + external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) + assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' + external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) + if external_aes_iv: + external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) byte_range = {} discontinuity_count = 0 frag_index = 0 @@ -169,10 +189,7 @@ class HlsFD(FragmentFD): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - frag_url = ( - line - if re.match(r'^https?://', line) - else compat_urlparse.urljoin(man_url, line)) + frag_url = urljoin(man_url, line) if extra_query: frag_url = update_url_query(frag_url, extra_query) @@ -194,13 +211,18 @@ class HlsFD(FragmentFD): return False frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) - frag_url = ( - map_info.get('URI') - if re.match(r'^https?://', map_info.get('URI')) - else compat_urlparse.urljoin(man_url, map_info.get('URI'))) + frag_url = urljoin(man_url, map_info.get('URI')) if extra_query: frag_url = update_url_query(frag_url, extra_query) + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + fragments.append({ 'frag_index': frag_index, 'url': frag_url, @@ -210,27 +232,22 @@ class HlsFD(FragmentFD): }) media_sequence += 1 - if map_info.get('BYTERANGE'): - splitted_byte_range = map_info.get('BYTERANGE').split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] - byte_range = { - 'start': sub_range_start, - 'end': sub_range_start + int(splitted_byte_range[0]), - } - elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': - if 'IV' in decrypt_info: + if external_aes_iv: + decrypt_info['IV'] = external_aes_iv + elif 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) - if not re.match(r'^https?://', decrypt_info['URI']): - decrypt_info['URI'] = compat_urlparse.urljoin( - man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) - if decrypt_url != decrypt_info['URI']: - decrypt_info['KEY'] = None + if external_aes_key: + decrypt_info['KEY'] = external_aes_key + else: + decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) @@ -339,7 +356,7 @@ class HlsFD(FragmentFD): continue block.write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') @@ -350,7 +367,7 @@ class HlsFD(FragmentFD): for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) diff --git a/plugins/youtube_download/yt_dlp/downloader/http.py b/plugins/youtube_download/yt_dlp/downloader/http.py index 34a1eb5..f523744 100644 --- a/plugins/youtube_download/yt_dlp/downloader/http.py +++ b/plugins/youtube_download/yt_dlp/downloader/http.py @@ -1,27 +1,27 @@ -from __future__ import unicode_literals - -import errno import os -import socket -import time import random -import re +import time from .common import FileDownloader -from ..compat import ( - compat_str, - compat_urllib_error, +from ..networking import Request +from ..networking.exceptions import ( + CertificateVerifyError, + HTTPError, + TransportError, ) from ..utils import ( ContentTooShortError, - encodeFilename, - int_or_none, - sanitized_Request, + RetryManager, ThrottledDownload, - write_xattr, XAttrMetadataError, XAttrUnavailableError, + encodeFilename, + int_or_none, + parse_http_range, + try_call, + write_xattr, ) +from ..utils.networking import HTTPHeaderDict class HttpFD(FileDownloader): @@ -39,11 +39,8 @@ class HttpFD(FileDownloader): ctx.tmpfilename = self.temp_name(filename) ctx.stream = None - # Do not include the Accept-Encoding header - headers = {'Youtubedl-no-compression': 'True'} - add_headers = info_dict.get('http_headers') - if add_headers: - headers.update(add_headers) + # Disable compression + headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers')) is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( @@ -53,11 +50,11 @@ class HttpFD(FileDownloader): ctx.open_mode = 'wb' ctx.resume_len = 0 - ctx.data_len = None ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() - ctx.chunk_size = None - throttle_start = None + + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) if self.params.get('continuedl', True): # Establish possible resume length @@ -67,9 +64,6 @@ class HttpFD(FileDownloader): ctx.is_resume = ctx.resume_len > 0 - count = 0 - retries = self.params.get('retries', 0) - class SucceedDownload(Exception): pass @@ -80,43 +74,50 @@ class HttpFD(FileDownloader): class NextFragment(Exception): pass - def set_range(req, start, end): - range_header = 'bytes=%d-' % start - if end: - range_header += compat_str(end) - req.add_header('Range', range_header) - def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' + elif req_start is not None: + range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None - if range_end and ctx.data_len is not None and range_end >= ctx.data_len: - range_end = ctx.data_len - 1 + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + + if try_call(lambda: range_start > range_end): + ctx.resume_len = 0 + ctx.open_mode = 'wb' + raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) + + if try_call(lambda: range_end >= ctx.content_len): + range_end = ctx.content_len - 1 + + request = Request(url, request_data, headers) has_range = range_start is not None - ctx.has_range = has_range - request = sanitized_Request(url, request_data, headers) if has_range: - set_range(request, range_start, range_end) + request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' # Establish connection try: - try: - ctx.data = self.ydl.urlopen(request) - except (compat_urllib_error.URLError, ) as err: - # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 - reason = getattr(err, 'reason', None) - if isinstance(reason, socket.timeout): - raise RetryDownload(err) - raise err + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range @@ -124,41 +125,37 @@ class HttpFD(FileDownloader): # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) - # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size - # Chunked download and requested piece or - # its part is promised to be served - or content_range_end == range_end - or content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return + content_range_start, content_range_end, content_len = parse_http_range(content_range) + # Content-Range is present and matches requested Range, resume is possible + if range_start == content_range_start and ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end): + ctx.content_len = content_len + if content_len or req_end: + ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0) + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload - self.report_unable_to_resume() + elif range_start > 0: + self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' - ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) - return - except (compat_urllib_error.HTTPError, ) as err: - if err.code == 416: + ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None)) + except HTTPError as err: + if err.status == 416: # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( - sanitized_Request(url, request_data, headers)) - content_length = ctx.data.info()['Content-Length'] - except (compat_urllib_error.HTTPError, ) as err: - if err.code < 500 or err.code >= 600: + Request(url, request_data, headers)) + content_length = ctx.data.headers['Content-Length'] + except HTTPError as err: + if err.status < 500 or err.status >= 600: raise else: # Examine the reported length @@ -186,21 +183,28 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' return - elif err.code < 500 or err.code >= 600: + elif err.status < 500 or err.status >= 600: # Unexpected HTTP error raise raise RetryDownload(err) - except socket.timeout as err: - raise RetryDownload(err) - except socket.error as err: - if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): - # Connection reset is no problem, just retry - raise RetryDownload(err) + except CertificateVerifyError: raise + except TransportError as err: + raise RetryDownload(err) + + def close_stream(): + if ctx.stream is not None: + if not ctx.tmpfilename == '-': + ctx.stream.close() + ctx.stream = None def download(): - nonlocal throttle_start - data_len = ctx.data.info().get('Content-length', None) + data_len = ctx.data.headers.get('Content-length') + + if ctx.data.headers.get('Content-encoding'): + # Content-encoding is present, Content-length is not reliable anymore as we are + # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) + data_len = None # Range HTTP header may be ignored/unsupported by a webserver # (e.g. extractor/scivee.py, extractor/bambuser.py). @@ -215,10 +219,12 @@ class HttpFD(FileDownloader): min_data_len = self.params.get('min_filesize') max_data_len = self.params.get('max_filesize') if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + self.to_screen( + f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') return False if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + self.to_screen( + f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') return False byte_counter = 0 + ctx.resume_len @@ -230,28 +236,17 @@ class HttpFD(FileDownloader): before = start # start measuring def retry(e): - to_stdout = ctx.tmpfilename == '-' - if ctx.stream is not None: - if not to_stdout: - ctx.stream.close() - ctx.stream = None - ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) + close_stream() + ctx.resume_len = (byte_counter if ctx.tmpfilename == '-' + else os.path.getsize(encodeFilename(ctx.tmpfilename))) raise RetryDownload(e) while True: try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - # SSLError on python 2 (inherits socket.error) may have - # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': - retry(e) - raise + except TransportError as err: + retry(err) byte_counter += len(data_block) @@ -267,19 +262,19 @@ class HttpFD(FileDownloader): assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) - except (OSError, IOError) as err: + except OSError as err: self.report_error('unable to open for writing: %s' % str(err)) return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: ctx.stream.write(data_block) - except (IOError, OSError) as err: + except OSError as err: self.to_stderr('\n') self.report_error('unable to write data: %s' % str(err)) return False @@ -322,38 +317,36 @@ class HttpFD(FileDownloader): if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down - if throttle_start is None: - throttle_start = now - elif now - throttle_start > 3: + if ctx.throttle_start is None: + ctx.throttle_start = now + elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() elif speed: - throttle_start = None - - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: - ctx.resume_len = byte_counter - # ctx.block_size = block_size - raise NextFragment() + ctx.throttle_start = None if ctx.stream is None: self.to_stderr('\n') self.report_error('Did not get any data blocks') return False + + if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: + ctx.resume_len = byte_counter + raise NextFragment() + if ctx.tmpfilename != '-': ctx.stream.close() if data_len is not None and byte_counter != data_len: err = ContentTooShortError(byte_counter, int(data_len)) - if count <= retries: - retry(err) - raise err + retry(err) self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time if self.params.get('updatetime', True): - info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ 'downloaded_bytes': byte_counter, @@ -366,21 +359,20 @@ class HttpFD(FileDownloader): return True - while count <= retries: + for retry in RetryManager(self.params.get('retries'), self.report_retry): try: establish_connection() return download() - except RetryDownload as e: - count += 1 - if count <= retries: - self.report_retry(e.source_error, count, retries) - else: - self.to_screen(f'[download] Got server HTTP error: {e.source_error}') + except RetryDownload as err: + retry.error = err.source_error continue except NextFragment: + retry.error = None + retry.attempt -= 1 continue except SucceedDownload: return True - - self.report_error('giving up after %s retries' % retries) + except: # noqa: E722 + close_stream() + raise return False diff --git a/plugins/youtube_download/yt_dlp/downloader/ism.py b/plugins/youtube_download/yt_dlp/downloader/ism.py index 09516ab..dd688f5 100644 --- a/plugins/youtube_download/yt_dlp/downloader/ism.py +++ b/plugins/youtube_download/yt_dlp/downloader/ism.py @@ -1,27 +1,23 @@ -from __future__ import unicode_literals - -import time import binascii import io +import struct +import time from .fragment import FragmentFD -from ..compat import ( - compat_Struct, - compat_urllib_error, -) +from ..networking.exceptions import HTTPError +from ..utils import RetryManager +u8 = struct.Struct('>B') +u88 = struct.Struct('>Bx') +u16 = struct.Struct('>H') +u1616 = struct.Struct('>Hxx') +u32 = struct.Struct('>I') +u64 = struct.Struct('>Q') -u8 = compat_Struct('>B') -u88 = compat_Struct('>Bx') -u16 = compat_Struct('>H') -u1616 = compat_Struct('>Hxx') -u32 = compat_Struct('>I') -u64 = compat_Struct('>Q') - -s88 = compat_Struct('>bx') -s16 = compat_Struct('>h') -s1616 = compat_Struct('>hxx') -s32 = compat_Struct('>i') +s88 = struct.Struct('>bx') +s16 = struct.Struct('>h') +s1616 = struct.Struct('>hxx') +s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) @@ -142,6 +138,8 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) + if fourcc == 'EC-3': + sample_entry_box = box(b'ec-3', sample_entry_payload) elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved @@ -156,7 +154,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version @@ -235,8 +233,6 @@ class IsmFD(FragmentFD): Download segments in a ISM manifest """ - FD_NAME = 'ism' - def real_download(self, filename, info_dict): segments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] @@ -252,7 +248,6 @@ class IsmFD(FragmentFD): 'ism_track_written': False, }) - fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 @@ -260,30 +255,29 @@ class IsmFD(FragmentFD): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - count = 0 - while count <= fragment_retries: + + retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, + frag_index=frag_index, fatal=not skip_unavailable_fragments) + for retry in retry_manager: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False + frag_content = self._read_fragment(ctx) + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) - break - except compat_urllib_error.HTTPError as err: - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - if skip_unavailable_fragments: - self.report_skip_fragment(frag_index) + except HTTPError as err: + retry.error = err continue - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False - self._finish_frag_download(ctx, info_dict) + if retry_manager.error: + if not skip_unavailable_fragments: + return False + self.report_skip_fragment(frag_index) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/mhtml.py b/plugins/youtube_download/yt_dlp/downloader/mhtml.py index 1477f65..d977dce 100644 --- a/plugins/youtube_download/yt_dlp/downloader/mhtml.py +++ b/plugins/youtube_download/yt_dlp/downloader/mhtml.py @@ -1,24 +1,15 @@ -# coding: utf-8 -from __future__ import unicode_literals - import io import quopri import re import uuid from .fragment import FragmentFD -from ..utils import ( - escapeHTML, - formatSeconds, - srt_subtitles_timecode, - urljoin, -) +from ..compat import imghdr +from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): - FD_NAME = 'mhtml' - _STYLESHEET = """\ html, body { margin: 0; @@ -62,7 +53,7 @@ body > figure > img { def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b - for b in quopri.encodestring(s.encode('utf-8'), header=True) + for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): @@ -159,25 +150,22 @@ body > figure > img { length=len(stub), title=self._escape_mime(title), stub=stub - ).encode('utf-8')) + ).encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): if (i + 1) <= ctx['fragment_index']: continue - fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue - - mime_type = b'image/jpeg' - if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): - mime_type = b'image/png' - if frag_content.startswith((b'GIF87a', b'GIF89a')): - mime_type = b'image/gif' - if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': - mime_type = b'image/webp' + frag_content = self._read_fragment(ctx) frag_header = io.BytesIO() frag_header.write( @@ -185,7 +173,7 @@ body > figure > img { frag_header.write( b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) frag_header.write( - b'Content-type: %b\r\n' % mime_type) + b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode()) frag_header.write( b'Content-length: %u\r\n' % len(frag_content)) frag_header.write( @@ -198,5 +186,4 @@ body > figure > img { ctx['dest_stream'].write( b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/niconico.py b/plugins/youtube_download/yt_dlp/downloader/niconico.py index 521dfec..5720f6e 100644 --- a/plugins/youtube_download/yt_dlp/downloader/niconico.py +++ b/plugins/youtube_download/yt_dlp/downloader/niconico.py @@ -1,22 +1,21 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import json import threading +import time +from . import get_suitable_downloader from .common import FileDownloader -from ..downloader import get_suitable_downloader -from ..extractor.niconico import NiconicoIE -from ..utils import sanitized_Request +from .external import FFmpegFD +from ..networking import Request +from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get class NiconicoDmcFD(FileDownloader): """ Downloading niconico douga from DMC with heartbeat """ - FD_NAME = 'niconico_dmc' - def real_download(self, filename, info_dict): - self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + from ..extractor.niconico import NiconicoIE + self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) @@ -29,7 +28,7 @@ class NiconicoDmcFD(FileDownloader): heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) - request = sanitized_Request(heartbeat_url, heartbeat_data) + request = Request(heartbeat_url, heartbeat_data) def heartbeat(): try: @@ -54,4 +53,94 @@ class NiconicoDmcFD(FileDownloader): with heartbeat_lock: timer[0].cancel() download_complete = True - return success + return success + + +class NiconicoLiveFD(FileDownloader): + """ Downloads niconico live without being stopped """ + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + ws_url = info_dict['url'] + ws_extractor = info_dict['ws'] + ws_origin_host = info_dict['origin'] + cookies = info_dict.get('cookies') + live_quality = info_dict.get('live_quality', 'high') + live_latency = info_dict.get('live_latency', 'high') + dl = FFmpegFD(self.ydl, self.params or {}) + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'protocol': 'm3u8', + }) + + def communicate_ws(reconnect): + if reconnect: + ws = WebSocketsWrapper(ws_url, { + 'Cookies': str_or_none(cookies) or '', + 'Origin': f'https://{ws_origin_host}', + 'Accept': '*/*', + 'User-Agent': self.params['http_headers']['User-Agent'], + }) + if self.ydl.params.get('verbose', False): + self.to_screen('[debug] Sending startWatching request') + ws.send(json.dumps({ + 'type': 'startWatching', + 'data': { + 'stream': { + 'quality': live_quality, + 'protocol': 'hls+fmp4', + 'latency': live_latency, + 'chasePlay': False + }, + 'room': { + 'protocol': 'webSocket', + 'commentable': True + }, + 'reconnect': True, + } + })) + else: + ws = ws_extractor + with ws: + while True: + recv = ws.recv() + if not recv: + continue + data = json.loads(recv) + if not data or not isinstance(data, dict): + continue + if data.get('type') == 'ping': + # pong back + ws.send(r'{"type":"pong"}') + ws.send(r'{"type":"keepSeat"}') + elif data.get('type') == 'disconnect': + self.write_debug(data) + return True + elif data.get('type') == 'error': + self.write_debug(data) + message = try_get(data, lambda x: x['body']['code'], str) or recv + return DownloadError(message) + elif self.ydl.params.get('verbose', False): + if len(recv) > 100: + recv = recv[:100] + '...' + self.to_screen('[debug] Server said: %s' % recv) + + def ws_main(): + reconnect = False + while True: + try: + ret = communicate_ws(reconnect) + if ret is True: + return + except BaseException as e: + self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + time.sleep(10) + continue + finally: + reconnect = True + + thread = threading.Thread(target=ws_main, daemon=True) + thread.start() + + return dl.download(filename, new_info_dict) diff --git a/plugins/youtube_download/yt_dlp/downloader/rtmp.py b/plugins/youtube_download/yt_dlp/downloader/rtmp.py index 90f1acf..0e09525 100644 --- a/plugins/youtube_download/yt_dlp/downloader/rtmp.py +++ b/plugins/youtube_download/yt_dlp/downloader/rtmp.py @@ -1,18 +1,15 @@ -from __future__ import unicode_literals - import os import re import subprocess import time from .common import FileDownloader -from ..compat import compat_str from ..utils import ( - check_executable, - encodeFilename, - encodeArgument, - get_exe_version, Popen, + check_executable, + encodeArgument, + encodeFilename, + get_exe_version, ) @@ -94,8 +91,7 @@ class RtmpFD(FileDownloader): self.to_screen('') return proc.wait() except BaseException: # Including KeyboardInterrupt - proc.kill() - proc.wait() + proc.kill(timeout=None) raise url = info_dict['url'] @@ -146,7 +142,7 @@ class RtmpFD(FileDownloader): if isinstance(conn, list): for entry in conn: basic_args += ['--conn', entry] - elif isinstance(conn, compat_str): + elif isinstance(conn, str): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] diff --git a/plugins/youtube_download/yt_dlp/downloader/rtsp.py b/plugins/youtube_download/yt_dlp/downloader/rtsp.py index 7815d59..e89269f 100644 --- a/plugins/youtube_download/yt_dlp/downloader/rtsp.py +++ b/plugins/youtube_download/yt_dlp/downloader/rtsp.py @@ -1,13 +1,8 @@ -from __future__ import unicode_literals - import os import subprocess from .common import FileDownloader -from ..utils import ( - check_executable, - encodeFilename, -) +from ..utils import check_executable, encodeFilename class RtspFD(FileDownloader): @@ -32,7 +27,7 @@ class RtspFD(FileDownloader): retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) + self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/plugins/youtube_download/yt_dlp/downloader/websocket.py b/plugins/youtube_download/yt_dlp/downloader/websocket.py index daac348..6837ff1 100644 --- a/plugins/youtube_download/yt_dlp/downloader/websocket.py +++ b/plugins/youtube_download/yt_dlp/downloader/websocket.py @@ -1,19 +1,12 @@ +import asyncio +import contextlib import os import signal -import asyncio import threading -try: - import websockets -except (ImportError, SyntaxError): - # websockets 3.10 on python 3.6 causes SyntaxError - # See https://github.com/yt-dlp/yt-dlp/issues/2633 - has_websockets = False -else: - has_websockets = True - from .common import FileDownloader from .external import FFmpegFD +from ..dependencies import websockets class FFmpegSinkFD(FileDownloader): @@ -26,14 +19,12 @@ class FFmpegSinkFD(FileDownloader): async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) - except (BrokenPipeError, OSError): + except OSError: pass finally: - try: + with contextlib.suppress(OSError): stdin.flush() stdin.close() - except OSError: - pass os.kill(os.getpid(), signal.SIGINT) class FFmpegStdinFD(FFmpegFD): diff --git a/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py b/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py index ef4205e..c7a8637 100644 --- a/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py +++ b/plugins/youtube_download/yt_dlp/downloader/youtube_live_chat.py @@ -1,29 +1,28 @@ -from __future__ import division, unicode_literals - import json import time from .fragment import FragmentFD -from ..compat import compat_urllib_error +from ..networking.exceptions import HTTPError from ..utils import ( - try_get, + RegexNotFoundError, + RetryManager, dict_get, int_or_none, - RegexNotFoundError, + try_get, ) -from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE +from ..utils.networking import HTTPHeaderDict class YoutubeLiveChatFD(FragmentFD): """ Downloads YouTube live chats fragment by fragment """ - FD_NAME = 'youtube_live_chat' - def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': + self.report_warning('Live chat download runs until the livestream ends. ' + 'If you wish to download the video simultaneously, run a separate yt-dlp instance') - fragment_retries = self.params.get('fragment_retries', 0) test = self.params.get('test', False) ctx = { @@ -32,15 +31,14 @@ class YoutubeLiveChatFD(FragmentFD): 'total_frags': None, } - ie = YT_BaseIE(self.ydl) + from ..extractor.youtube import YoutubeBaseInfoExtractor + + ie = YoutubeBaseInfoExtractor(self.ydl) start_time = int(time.time() * 1000) def dl_fragment(url, data=None, headers=None): - http_headers = info_dict.get('http_headers', {}) - if headers: - http_headers = http_headers.copy() - http_headers.update(headers) + http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers) return self._download_fragment(ctx, url, info_dict, http_headers, data) def parse_actions_replay(live_chat_continuation): @@ -51,7 +49,7 @@ class YoutubeLiveChatFD(FragmentFD): replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( - json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, @@ -93,7 +91,7 @@ class YoutubeLiveChatFD(FragmentFD): 'isLive': True, } processed_fragment.extend( - json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], @@ -109,12 +107,12 @@ class YoutubeLiveChatFD(FragmentFD): return continuation_id, live_offset, click_tracking_params def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): - count = 0 - while count <= fragment_retries: + for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index): try: - success, raw_fragment = dl_fragment(url, request_data, headers) + success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: @@ -124,27 +122,22 @@ class YoutubeLiveChatFD(FragmentFD): live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - if info_dict['protocol'] == 'youtube_live_chat_replay': - if frag_index == 1: - continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation) - else: - continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation) - elif info_dict['protocol'] == 'youtube_live_chat': - continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation) - return True, continuation_id, offset, click_tracking_params - except compat_urllib_error.HTTPError as err: - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False, None, None, None + + func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live + or frag_index == 1 and try_refresh_replay_beginning + or parse_actions_replay) + return (True, *func(live_chat_continuation)) + except HTTPError as err: + retry.error = err + continue + return False, None, None, None self._prepare_and_start_frag_download(ctx, info_dict) - success, raw_fragment = dl_fragment(info_dict['url']) + success = dl_fragment(info_dict['url']) if not success: return False + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: @@ -185,7 +178,7 @@ class YoutubeLiveChatFD(FragmentFD): request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) - fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: @@ -196,8 +189,7 @@ class YoutubeLiveChatFD(FragmentFD): if test: break - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) @staticmethod def parse_live_timestamp(action): diff --git a/plugins/youtube_download/yt_dlp/extractor/__init__.py b/plugins/youtube_download/yt_dlp/extractor/__init__.py index b354842..6bfa4bd 100644 --- a/plugins/youtube_download/yt_dlp/extractor/__init__.py +++ b/plugins/youtube_download/yt_dlp/extractor/__init__.py @@ -1,33 +1,15 @@ -import os +from ..compat.compat_utils import passthrough_module -from ..utils import load_plugins - -_LAZY_LOADER = False -if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - try: - from .lazy_extractors import * - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True - except ImportError: - pass - -if not _LAZY_LOADER: - from .extractors import * - _ALL_CLASSES = [ - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' - ] - _ALL_CLASSES.append(GenericIE) - -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) -_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +passthrough_module(__name__, '.extractors') +del passthrough_module def gen_extractor_classes(): """ Return a list of supported extractors. The order does matter; the first extractor matched is the one handling the URL. """ + from .extractors import _ALL_CLASSES + return _ALL_CLASSES @@ -38,17 +20,23 @@ def gen_extractors(): return [klass() for klass in gen_extractor_classes()] -def list_extractors(age_limit): - """ - Return a list of extractors that are suitable for the given age, - sorted by extractor ID. - """ +def list_extractor_classes(age_limit=None): + """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + from .generic import GenericIE - return sorted( - filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), - key=lambda ie: ie.IE_NAME.lower()) + yield from sorted(filter( + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, + gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) + yield GenericIE + + +def list_extractors(age_limit=None): + """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" + return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" - return globals()[ie_name + 'IE'] + from . import extractors + + return getattr(extractors, f'{ie_name}IE') diff --git a/plugins/youtube_download/yt_dlp/extractor/_extractors.py b/plugins/youtube_download/yt_dlp/extractor/_extractors.py new file mode 100644 index 0000000..63bb55e --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/_extractors.py @@ -0,0 +1,2528 @@ +# flake8: noqa: F401 + +from .youtube import ( # Youtube is moved to the top to improve performance + YoutubeIE, + YoutubeClipIE, + YoutubeFavouritesIE, + YoutubeNotificationsIE, + YoutubeHistoryIE, + YoutubeTabIE, + YoutubeLivestreamEmbedIE, + YoutubePlaylistIE, + YoutubeRecommendedIE, + YoutubeSearchDateIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeMusicSearchURLIE, + YoutubeSubscriptionsIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeYtBeIE, + YoutubeYtUserIE, + YoutubeWatchLaterIE, + YoutubeShortsAudioPivotIE, + YoutubeConsentRedirectIE, +) + +from .abc import ( + ABCIE, + ABCIViewIE, + ABCIViewShowSeriesIE, +) +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) +from .abcotvs import ( + ABCOTVSIE, + ABCOTVSClipsIE, +) +from .abematv import ( + AbemaTVIE, + AbemaTVTitleIE, +) +from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) +from .acfun import AcFunVideoIE, AcFunBangumiIE +from .adn import ADNIE +from .adobeconnect import AdobeConnectIE +from .adobetv import ( + AdobeTVEmbedIE, + AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, + AdobeTVVideoIE, +) +from .adultswim import AdultSwimIE +from .aenetworks import ( + AENetworksIE, + AENetworksCollectionIE, + AENetworksShowIE, + HistoryTopicIE, + HistoryPlayerIE, + BiographyIE, +) +from .aeonco import AeonCoIE +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVLiveIE, + AfreecaTVUserIE, +) +from .agora import ( + TokFMAuditionIE, + TokFMPodcastIE, + WyborczaPodcastIE, + WyborczaVideoIE, +) +from .airmozilla import AirMozillaIE +from .airtv import AirTVIE +from .aitube import AitubeKZVideoIE +from .aljazeera import AlJazeeraIE +from .alphaporno import AlphaPornoIE +from .amara import AmaraIE +from .alura import ( + AluraIE, + AluraCourseIE +) +from .amcnetworks import AMCNetworksIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) +from .amazonminitv import ( + AmazonMiniTVIE, + AmazonMiniTVSeasonIE, + AmazonMiniTVSeriesIE, +) +from .americastestkitchen import ( + AmericasTestKitchenIE, + AmericasTestKitchenSeasonIE, +) +from .anchorfm import AnchorFMEpisodeIE +from .angel import AngelIE +from .anvato import AnvatoIE +from .aol import AolIE +from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE +from .alsace20tv import ( + Alsace20TVIE, + Alsace20TVEmbedIE, +) +from .apa import APAIE +from .aparat import AparatIE +from .appleconnect import AppleConnectIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) +from .applepodcasts import ApplePodcastsIE +from .archiveorg import ( + ArchiveOrgIE, + YoutubeWebArchiveIE, + VLiveWebArchiveIE, +) +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .ard import ( + ARDBetaMediathekIE, + ARDIE, + ARDMediathekIE, +) +from .arte import ( + ArteTVIE, + ArteTVEmbedIE, + ArteTVPlaylistIE, + ArteTVCategoryIE, +) +from .arnes import ArnesIE +from .asiancrush import ( + AsianCrushIE, + AsianCrushPlaylistIE, +) +from .atresplayer import AtresPlayerIE +from .atscaleconf import AtScaleConfEventIE +from .atttechchannel import ATTTechChannelIE +from .atvat import ATVAtIE +from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE +from .audiodraft import ( + AudiodraftCustomIE, + AudiodraftGenericIE, +) +from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audius import ( + AudiusIE, + AudiusTrackIE, + AudiusPlaylistIE, + AudiusProfileIE, +) +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) +from .azmedien import AZMedienIE +from .baidu import BaiduVideoIE +from .banbye import ( + BanByeIE, + BanByeChannelIE, +) +from .bandaichannel import BandaiChannelIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) +from .bannedvideo import BannedVideoIE +from .bbc import ( + BBCCoUkIE, + BBCCoUkArticleIE, + BBCCoUkIPlayerEpisodesIE, + BBCCoUkIPlayerGroupIE, + BBCCoUkPlaylistIE, + BBCIE, +) +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE +from .beatbump import ( + BeatBumpVideoIE, + BeatBumpPlaylistIE, +) +from .beatport import BeatportIE +from .berufetv import BerufeTVIE +from .bet import BetIE +from .bfi import BFIPlayerIE +from .bfmtv import ( + BFMTVIE, + BFMTVLiveIE, + BFMTVArticleIE, +) +from .bibeltv import ( + BibelTVLiveIE, + BibelTVSeriesIE, + BibelTVVideoIE, +) +from .bigflix import BigflixIE +from .bigo import BigoIE +from .bild import BildIE +from .bilibili import ( + BiliBiliIE, + BiliBiliBangumiIE, + BiliBiliBangumiSeasonIE, + BiliBiliBangumiMediaIE, + BiliBiliSearchIE, + BilibiliCategoryIE, + BilibiliAudioIE, + BilibiliAudioAlbumIE, + BiliBiliPlayerIE, + BilibiliSpaceVideoIE, + BilibiliSpaceAudioIE, + BilibiliSpacePlaylistIE, + BiliIntlIE, + BiliIntlSeriesIE, + BiliLiveIE, +) +from .biobiochiletv import BioBioChileTVIE +from .bitchute import ( + BitChuteIE, + BitChuteChannelIE, +) +from .bitwave import ( + BitwaveReplayIE, + BitwaveStreamIE, +) +from .biqle import BIQLEIE +from .blackboardcollaborate import BlackboardCollaborateIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) +from .blerp import BlerpIE +from .blogger import BloggerIE +from .bloomberg import BloombergIE +from .bokecc import BokeCCIE +from .bongacams import BongaCamsIE +from .bostonglobe import BostonGlobeIE +from .box import BoxIE +from .boxcast import BoxCastVideoIE +from .bpb import BpbIE +from .br import ( + BRIE, + BRMediathekIE, +) +from .bravotv import BravoTVIE +from .brainpop import ( + BrainPOPIE, + BrainPOPJrIE, + BrainPOPELLIE, + BrainPOPEspIE, + BrainPOPFrIE, + BrainPOPIlIE, +) +from .breakcom import BreakIE +from .breitbart import BreitBartIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) +from .businessinsider import BusinessInsiderIE +from .bundesliga import BundesligaIE +from .buzzfeed import BuzzFeedIE +from .byutv import BYUtvIE +from .c56 import C56IE +from .cableav import CableAVIE +from .callin import CallinIE +from .caltrans import CaltransIE +from .cam4 import CAM4IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) +from .camfm import ( + CamFMEpisodeIE, + CamFMShowIE +) +from .cammodels import CamModelsIE +from .camsoda import CamsodaIE +from .camtasia import CamtasiaEmbedIE +from .camwithher import CamWithHerIE +from .canalalpha import CanalAlphaIE +from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE +from .carambatv import ( + CarambaTVIE, + CarambaTVPageIE, +) +from .cartoonnetwork import CartoonNetworkIE +from .cbc import ( + CBCIE, + CBCPlayerIE, + CBCGemIE, + CBCGemPlaylistIE, + CBCGemLiveIE, +) +from .cbs import ( + CBSIE, + ParamountPressExpressIE, +) +from .cbsinteractive import CBSInteractiveIE +from .cbsnews import ( + CBSNewsEmbedIE, + CBSNewsIE, + CBSLocalIE, + CBSLocalArticleIE, + CBSLocalLiveIE, + CBSNewsLiveIE, + CBSNewsLiveVideoIE, +) +from .cbssports import ( + CBSSportsEmbedIE, + CBSSportsIE, + TwentyFourSevenSportsIE, +) +from .ccc import ( + CCCIE, + CCCPlaylistIE, +) +from .ccma import CCMAIE +from .cctv import CCTVIE +from .cda import CDAIE +from .cellebrite import CellebriteIE +from .ceskatelevize import CeskaTelevizeIE +from .cgtn import CGTNIE +from .channel9 import Channel9IE +from .charlierose import CharlieRoseIE +from .chaturbate import ChaturbateIE +from .chilloutzone import ChilloutzoneIE +from .chingari import ( + ChingariIE, + ChingariUserIE, +) +from .chirbit import ( + ChirbitIE, + ChirbitProfileIE, +) +from .cinchcast import CinchcastIE +from .cinemax import CinemaxIE +from .cinetecamilano import CinetecaMilanoIE +from .ciscolive import ( + CiscoLiveSessionIE, + CiscoLiveSearchIE, +) +from .ciscowebex import CiscoWebexIE +from .cjsw import CJSWIE +from .clipchamp import ClipchampIE +from .cliphunter import CliphunterIE +from .clippit import ClippitIE +from .cliprs import ClipRsIE +from .clipsyndicate import ClipsyndicateIE +from .closertotruth import CloserToTruthIE +from .cloudflarestream import CloudflareStreamIE +from .cloudy import CloudyIE +from .clubic import ClubicIE +from .clyp import ClypIE +from .cmt import CMTIE +from .cnbc import ( + CNBCIE, + CNBCVideoIE, +) +from .cnn import ( + CNNIE, + CNNBlogsIE, + CNNArticleIE, + CNNIndonesiaIE, +) +from .coub import CoubIE +from .comedycentral import ( + ComedyCentralIE, + ComedyCentralTVIE, +) +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonprotocols import ( + MmsIE, + RtmpIE, + ViewSourceIE, +) +from .condenast import CondeNastIE +from .contv import CONtvIE +from .corus import CorusIE +from .cpac import ( + CPACIE, + CPACPlaylistIE, +) +from .cozytv import CozyTVIE +from .cracked import CrackedIE +from .crackle import CrackleIE +from .craftsy import CraftsyIE +from .crooksandliars import CrooksAndLiarsIE +from .crowdbunker import ( + CrowdBunkerIE, + CrowdBunkerChannelIE, +) +from .crtvg import CrtvgIE +from .crunchyroll import ( + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, + CrunchyrollMusicIE, + CrunchyrollArtistIE, +) +from .cspan import CSpanIE, CSpanCongressIE +from .ctsnews import CtsNewsIE +from .ctv import CTVIE +from .ctvnews import CTVNewsIE +from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, +) +from .cwtv import CWTVIE +from .cybrary import ( + CybraryIE, + CybraryCourseIE +) +from .dacast import ( + DacastVODIE, + DacastPlaylistIE, +) +from .daftsex import DaftsexIE +from .dailymail import DailyMailIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, +) +from .dailywire import ( + DailyWireIE, + DailyWirePodcastIE, +) +from .damtomo import ( + DamtomoRecordIE, + DamtomoVideoIE, +) +from .daum import ( + DaumIE, + DaumClipIE, + DaumPlaylistIE, + DaumUserIE, +) +from .daystar import DaystarClipIE +from .dbtv import DBTVIE +from .dctp import DctpTvIE +from .deezer import ( + DeezerPlaylistIE, + DeezerAlbumIE, +) +from .democracynow import DemocracynowIE +from .detik import DetikEmbedIE +from .dlf import ( + DLFIE, + DLFCorpusIE, +) +from .dfb import DFBIE +from .dhm import DHMIE +from .digg import DiggIE +from .dotsub import DotsubIE +from .douyutv import ( + DouyuShowIE, + DouyuTVIE, +) +from .dplay import ( + DPlayIE, + DiscoveryPlusIE, + HGTVDeIE, + GoDiscoveryIE, + TravelChannelIE, + CookingChannelIE, + HGTVUsaIE, + FoodNetworkIE, + InvestigationDiscoveryIE, + DestinationAmericaIE, + AmHistoryChannelIE, + ScienceChannelIE, + DIYNetworkIE, + DiscoveryLifeIE, + AnimalPlanetIE, + TLCIE, + MotorTrendIE, + MotorTrendOnDemandIE, + DiscoveryPlusIndiaIE, + DiscoveryNetworksDeIE, + DiscoveryPlusItalyIE, + DiscoveryPlusItalyShowIE, + DiscoveryPlusIndiaShowIE, + GlobalCyclingNetworkPlusIE, +) +from .dreisat import DreiSatIE +from .drbonanza import DRBonanzaIE +from .drtuber import DrTuberIE +from .drtv import ( + DRTVIE, + DRTVLiveIE, + DRTVSeasonIE, + DRTVSeriesIE, +) +from .dtube import DTubeIE +from .dvtv import DVTVIE +from .duboku import ( + DubokuIE, + DubokuPlaylistIE +) +from .dumpert import DumpertIE +from .defense import DefenseGouvFrIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE +) +from .digitalconcerthall import DigitalConcertHallIE +from .discogs import DiscogsReleasePlaylistIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutSeasonIE, + DropoutIE +) +from .dw import ( + DWIE, + DWArticleIE, +) +from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE +from .ebaumsworld import EbaumsWorldIE +from .ebay import EbayIE +from .echomsk import EchoMskIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) +from .ehow import EHowIE +from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE +from .eitb import EitbIE +from .elevensports import ElevenSportsIE +from .ellentube import ( + EllenTubeIE, + EllenTubeVideoIE, + EllenTubePlaylistIE, +) +from .elonet import ElonetIE +from .elpais import ElPaisIE +from .embedly import EmbedlyIE +from .engadget import EngadgetIE +from .epicon import ( + EpiconIE, + EpiconSeriesIE, +) +from .epoch import EpochIE +from .eporner import EpornerIE +from .eroprofile import ( + EroProfileIE, + EroProfileAlbumIE, +) +from .ertgr import ( + ERTFlixCodenameIE, + ERTFlixIE, + ERTWebtvEmbedIE, +) +from .escapist import EscapistIE +from .espn import ( + ESPNIE, + WatchESPNIE, + ESPNArticleIE, + FiveThirtyEightIE, + ESPNCricInfoIE, +) +from .esri import EsriVideoIE +from .ettutv import EttuTvIE +from .europa import EuropaIE, EuroParlWebstreamIE +from .europeantour import EuropeanTourIE +from .eurosport import EurosportIE +from .euscreen import EUScreenIE +from .expotv import ExpoTVIE +from .expressen import ExpressenIE +from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, + FacebookRedirectURLIE, + FacebookReelIE, +) +from .fancode import ( + FancodeVodIE, + FancodeLiveIE +) + +from .faz import FazIE +from .fc2 import ( + FC2IE, + FC2EmbedIE, + FC2LiveIE, +) +from .fczenit import FczenitIE +from .fifa import FifaIE +from .filmmodu import FilmmoduIE +from .filmon import ( + FilmOnIE, + FilmOnChannelIE, +) +from .filmweb import FilmwebIE +from .firsttv import FirstTVIE +from .fivetv import FiveTVIE +from .flickr import FlickrIE +from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE +from .formula1 import Formula1IE +from .fourtube import ( + FourTubeIE, + PornTubeIE, + PornerBrosIE, + FuxIE, +) +from .fourzerostudio import ( + FourZeroStudioArchiveIE, + FourZeroStudioClipIE, +) +from .fox import FOXIE +from .fox9 import ( + FOX9IE, + FOX9NewsIE, +) +from .foxgay import FoxgayIE +from .foxnews import ( + FoxNewsIE, + FoxNewsArticleIE, + FoxNewsVideoIE, +) +from .foxsports import FoxSportsIE +from .fptplay import FptplayIE +from .franceinter import FranceInterIE +from .francetv import ( + FranceTVIE, + FranceTVSiteIE, + FranceTVInfoIE, +) +from .freesound import FreesoundIE +from .freespeech import FreespeechIE +from .frontendmasters import ( + FrontendMastersIE, + FrontendMastersLessonIE, + FrontendMastersCourseIE +) +from .freetv import ( + FreeTvIE, + FreeTvMoviesIE, +) +from .fujitv import FujiTVFODPlus7IE +from .funimation import ( + FunimationIE, + FunimationPageIE, + FunimationShowIE, +) +from .funk import FunkIE +from .funker530 import Funker530IE +from .fusion import FusionIE +from .fuyintv import FuyinTVIE +from .gab import ( + GabTVIE, + GabIE, +) +from .gaia import GaiaIE +from .gameinformer import GameInformerIE +from .gamejolt import ( + GameJoltIE, + GameJoltUserIE, + GameJoltGameIE, + GameJoltGameSoundtrackIE, + GameJoltCommunityIE, + GameJoltSearchIE, +) +from .gamespot import GameSpotIE +from .gamestar import GameStarIE +from .gaskrank import GaskrankIE +from .gazeta import GazetaIE +from .gdcvault import GDCVaultIE +from .gedidigital import GediDigitalIE +from .generic import GenericIE +from .genius import ( + GeniusIE, + GeniusLyricsIE, +) +from .gettr import ( + GettrIE, + GettrStreamingIE, +) +from .gfycat import GfycatIE +from .giantbomb import GiantBombIE +from .giga import GigaIE +from .glide import GlideIE +from .globalplayer import ( + GlobalPlayerLiveIE, + GlobalPlayerLivePlaylistIE, + GlobalPlayerAudioIE, + GlobalPlayerAudioEpisodeIE, + GlobalPlayerVideoIE +) +from .globo import ( + GloboIE, + GloboArticleIE, +) +from .gmanetwork import GMANetworkVideoIE +from .go import GoIE +from .godtube import GodTubeIE +from .gofile import GofileIE +from .golem import GolemIE +from .goodgame import GoodGameIE +from .googledrive import ( + GoogleDriveIE, + GoogleDriveFolderIE, +) +from .googlepodcasts import ( + GooglePodcastsIE, + GooglePodcastsFeedIE, +) +from .googlesearch import GoogleSearchIE +from .gopro import GoProIE +from .goplay import GoPlayIE +from .goshgay import GoshgayIE +from .gotostage import GoToStageIE +from .gputechconf import GPUTechConfIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) +from .groupon import GrouponIE +from .harpodeon import HarpodeonIE +from .hbo import HBOIE +from .hearthisat import HearThisAtIE +from .heise import HeiseIE +from .hellporno import HellPornoIE +from .helsinki import HelsinkiIE +from .hgtv import HGTVComShowIE +from .hketv import HKETVIE +from .hidive import HiDiveIE +from .historicfilms import HistoricFilmsIE +from .hitbox import HitboxIE, HitboxLiveIE +from .hitrecord import HitRecordIE +from .hollywoodreporter import ( + HollywoodReporterIE, + HollywoodReporterPlaylistIE, +) +from .holodex import HolodexIE +from .hotnewhiphop import HotNewHipHopIE +from .hotstar import ( + HotStarIE, + HotStarPrefixIE, + HotStarPlaylistIE, + HotStarSeasonIE, + HotStarSeriesIE, +) +from .howcast import HowcastIE +from .howstuffworks import HowStuffWorksIE +from .hrefli import HrefLiRedirectIE +from .hrfensehen import HRFernsehenIE +from .hrti import ( + HRTiIE, + HRTiPlaylistIE, +) +from .hse import ( + HSEShowIE, + HSEProductIE, +) +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) +from .huajiao import HuajiaoIE +from .huya import HuyaLiveIE +from .huffpost import HuffPostIE +from .hungama import ( + HungamaIE, + HungamaSongIE, + HungamaAlbumPlaylistIE, +) +from .hypem import HypemIE +from .hypergryph import MonsterSirenHypergryphMusicIE +from .hytale import HytaleIE +from .icareus import IcareusIE +from .ichinanalive import ( + IchinanaLiveIE, + IchinanaLiveClipIE, +) +from .idolplus import IdolPlusIE +from .ign import ( + IGNIE, + IGNVideoIE, + IGNArticleIE, +) +from .iheart import ( + IHeartRadioIE, + IHeartRadioPodcastIE, +) +from .iltalehti import IltalehtiIE +from .imdb import ( + ImdbIE, + ImdbListIE +) +from .imgur import ( + ImgurIE, + ImgurAlbumIE, + ImgurGalleryIE, +) +from .ina import InaIE +from .inc import IncIE +from .indavideo import IndavideoEmbedIE +from .infoq import InfoQIE +from .instagram import ( + InstagramIE, + InstagramIOSIE, + InstagramUserIE, + InstagramTagIE, + InstagramStoryIE, +) +from .internazionale import InternazionaleIE +from .internetvideoarchive import InternetVideoArchiveIE +from .iprima import ( + IPrimaIE, + IPrimaCNNIE +) +from .iqiyi import ( + IqiyiIE, + IqIE, + IqAlbumIE +) +from .islamchannel import ( + IslamChannelIE, + IslamChannelSeriesIE, +) +from .israelnationalnews import IsraelNationalNewsIE +from .itprotv import ( + ITProTVIE, + ITProTVCourseIE +) +from .itv import ( + ITVIE, + ITVBTCCIE, +) +from .ivi import ( + IviIE, + IviCompilationIE +) +from .ivideon import IvideonIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) +from .ixigua import IxiguaIE +from .izlesene import IzleseneIE +from .jable import ( + JableIE, + JablePlaylistIE, +) +from .jamendo import ( + JamendoIE, + JamendoAlbumIE, +) +from .japandiet import ( + ShugiinItvLiveIE, + ShugiinItvLiveRoomIE, + ShugiinItvVodIE, + SangiinInstructionIE, + SangiinIE, +) +from .jeuxvideo import JeuxVideoIE +from .jove import JoveIE +from .joj import JojIE +from .jstream import JStreamIE +from .jwplatform import JWPlatformIE +from .kakao import KakaoIE +from .kaltura import KalturaIE +from .kanal2 import Kanal2IE +from .kankanews import KankaNewsIE +from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE +from .keezmovies import KeezMoviesIE +from .kelbyone import KelbyOneIE +from .khanacademy import ( + KhanAcademyIE, + KhanAcademyUnitIE, +) +from .kick import ( + KickIE, + KickVODIE, +) +from .kicker import KickerIE +from .kickstarter import KickStarterIE +from .kinja import KinjaEmbedIE +from .kinopoisk import KinoPoiskIE +from .kommunetv import KommunetvIE +from .kompas import KompasVideoIE +from .konserthusetplay import KonserthusetPlayIE +from .koo import KooIE +from .kth import KTHIE +from .krasview import KrasViewIE +from .ku6 import Ku6IE +from .kusi import KUSIIE +from .kuwo import ( + KuwoIE, + KuwoAlbumIE, + KuwoChartIE, + KuwoSingerIE, + KuwoCategoryIE, + KuwoMvIE, +) +from .la7 import ( + LA7IE, + LA7PodcastEpisodeIE, + LA7PodcastIE, +) +from .laola1tv import ( + Laola1TvEmbedIE, + Laola1TvIE, + EHFTVIE, + ITTFIE, +) +from .lastfm import ( + LastFMIE, + LastFMPlaylistIE, + LastFMUserIE, +) +from .lbry import ( + LBRYIE, + LBRYChannelIE, +) +from .lci import LCIIE +from .lcp import ( + LcpPlayIE, + LcpIE, +) +from .lecture2go import Lecture2GoIE +from .lecturio import ( + LecturioIE, + LecturioCourseIE, + LecturioDeCourseIE, +) +from .leeco import ( + LeIE, + LePlaylistIE, + LetvCloudIE, +) +from .lefigaro import ( + LeFigaroVideoEmbedIE, + LeFigaroVideoSectionIE, +) +from .lego import LEGOIE +from .lemonde import LemondeIE +from .lenta import LentaIE +from .libraryofcongress import LibraryOfCongressIE +from .libsyn import LibsynIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) +from .likee import ( + LikeeIE, + LikeeUserIE +) +from .limelight import ( + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, +) +from .linkedin import ( + LinkedInIE, + LinkedInLearningIE, + LinkedInLearningCourseIE, +) +from .linuxacademy import LinuxAcademyIE +from .liputan6 import Liputan6IE +from .listennotes import ListenNotesIE +from .litv import LiTVIE +from .livejournal import LiveJournalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) +from .livestreamfails import LivestreamfailsIE +from .lnkgo import ( + LnkGoIE, + LnkIE, +) +from .localnews8 import LocalNews8IE +from .lovehomeporn import LoveHomePornIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) +from .lumni import ( + LumniIE +) +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .m6 import M6IE +from .magellantv import MagellanTVIE +from .magentamusik360 import MagentaMusik360IE +from .mailru import ( + MailRuIE, + MailRuMusicIE, + MailRuMusicSearchIE, +) +from .mainstreaming import MainStreamingIE +from .malltv import MallTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) +from .manoto import ( + ManotoTVIE, + ManotoTVShowIE, + ManotoTVLiveIE, +) +from .manyvids import ManyVidsIE +from .maoritv import MaoriTVIE +from .markiza import ( + MarkizaIE, + MarkizaPageIE, +) +from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE +from .matchtv import MatchTVIE +from .mdr import MDRIE +from .medaltv import MedalTVIE +from .mediaite import MediaiteIE +from .mediaklikk import MediaKlikkIE +from .mediaset import ( + MediasetIE, + MediasetShowIE, +) +from .mediasite import ( + MediasiteIE, + MediasiteCatalogIE, + MediasiteNamedCatalogIE, +) +from .mediastream import ( + MediaStreamIE, + WinSportsVideoIE, +) +from .mediaworksnz import MediaWorksNZVODIE +from .medici import MediciIE +from .megaphone import MegaphoneIE +from .meipai import MeipaiIE +from .melonvod import MelonVODIE +from .meta import METAIE +from .metacafe import MetacafeIE +from .metacritic import MetacriticIE +from .mgoon import MgoonIE +from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE +from .microsoftstream import MicrosoftStreamIE +from .microsoftvirtualacademy import ( + MicrosoftVirtualAcademyIE, + MicrosoftVirtualAcademyCourseIE, +) +from .microsoftembed import MicrosoftEmbedIE +from .mildom import ( + MildomIE, + MildomVodIE, + MildomClipIE, + MildomUserVodIE, +) +from .minds import ( + MindsIE, + MindsChannelIE, + MindsGroupIE, +) +from .ministrygrid import MinistryGridIE +from .minoto import MinotoIE +from .miomio import MioMioIE +from .mirrativ import ( + MirrativIE, + MirrativUserIE, +) +from .mirrorcouk import MirrorCoUKIE +from .mit import TechTVMITIE, OCWMITIE +from .mitele import MiTeleIE +from .mixch import ( + MixchIE, + MixchArchiveIE, +) +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, +) +from .mlb import ( + MLBIE, + MLBVideoIE, + MLBTVIE, + MLBArticleIE, +) +from .mlssoccer import MLSSoccerIE +from .mnet import MnetIE +from .mocha import MochaVideoIE +from .moevideo import MoeVideoIE +from .mofosex import ( + MofosexIE, + MofosexEmbedIE, +) +from .mojvideo import MojvideoIE +from .morningstar import MorningstarIE +from .motherless import ( + MotherlessIE, + MotherlessGroupIE, + MotherlessGalleryIE, +) +from .motorsport import MotorsportIE +from .movieclips import MovieClipsIE +from .moviepilot import MoviepilotIE +from .moview import MoviewPlayIE +from .moviezine import MoviezineIE +from .movingimage import MovingImageIE +from .msn import MSNIE +from .mtv import ( + MTVIE, + MTVVideoIE, + MTVServicesEmbeddedIE, + MTVDEIE, + MTVJapanIE, + MTVItaliaIE, + MTVItaliaProgrammaIE, +) +from .muenchentv import MuenchenTVIE +from .murrtube import MurrtubeIE, MurrtubeUserIE +from .museai import MuseAIIE +from .musescore import MuseScoreIE +from .musicdex import ( + MusicdexSongIE, + MusicdexAlbumIE, + MusicdexArtistIE, + MusicdexPlaylistIE, +) +from .mwave import MwaveIE, MwaveMeetGreetIE +from .mxplayer import ( + MxplayerIE, + MxplayerShowIE, +) +from .mychannels import MyChannelsIE +from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspass import MySpassIE +from .myvi import ( + MyviIE, + MyviEmbedIE, +) +from .myvideoge import MyVideoGeIE +from .myvidster import MyVidsterIE +from .mzaalo import MzaaloIE +from .n1 import ( + N1InfoAssetIE, + N1InfoIIE, +) +from .nate import ( + NateIE, + NateProgramIE, +) +from .nationalgeographic import ( + NationalGeographicVideoIE, + NationalGeographicTVIE, +) +from .naver import ( + NaverIE, + NaverLiveIE, + NaverNowIE, +) +from .nba import ( + NBAWatchEmbedIE, + NBAWatchIE, + NBAWatchCollectionIE, + NBAEmbedIE, + NBAIE, + NBAChannelIE, +) +from .nbc import ( + NBCIE, + NBCNewsIE, + NBCOlympicsIE, + NBCOlympicsStreamIE, + NBCSportsIE, + NBCSportsStreamIE, + NBCSportsVPlayerIE, + NBCStationsIE, +) +from .ndr import ( + NDRIE, + NJoyIE, + NDREmbedBaseIE, + NDREmbedIE, + NJoyEmbedIE, +) +from .ndtv import NDTVIE +from .nebula import ( + NebulaIE, + NebulaSubscriptionsIE, + NebulaChannelIE, +) +from .nekohacker import NekoHackerIE +from .nerdcubed import NerdCubedFeedIE +from .netzkino import NetzkinoIE +from .neteasemusic import ( + NetEaseMusicIE, + NetEaseMusicAlbumIE, + NetEaseMusicSingerIE, + NetEaseMusicListIE, + NetEaseMusicMvIE, + NetEaseMusicProgramIE, + NetEaseMusicDjRadioIE, +) +from .netverse import ( + NetverseIE, + NetversePlaylistIE, + NetverseSearchIE, +) +from .newgrounds import ( + NewgroundsIE, + NewgroundsPlaylistIE, + NewgroundsUserIE, +) +from .newspicks import NewsPicksIE +from .newstube import NewstubeIE +from .newsy import NewsyIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyIE, + NextTVIE, +) +from .nexx import ( + NexxIE, + NexxEmbedIE, +) +from .nfb import NFBIE +from .nfhsnetwork import NFHSNetworkIE +from .nfl import ( + NFLIE, + NFLArticleIE, + NFLPlusEpisodeIE, + NFLPlusReplayIE, +) +from .nhk import ( + NhkVodIE, + NhkVodProgramIE, + NhkForSchoolBangumiIE, + NhkForSchoolSubjectIE, + NhkForSchoolProgramListIE, + NhkRadioNewsPageIE, + NhkRadiruIE, + NhkRadiruLiveIE, +) +from .nhl import NHLIE +from .nick import ( + NickIE, + NickBrIE, + NickDeIE, + NickNightIE, + NickRuIE, +) +from .niconico import ( + NiconicoIE, + NiconicoPlaylistIE, + NiconicoUserIE, + NiconicoSeriesIE, + NiconicoHistoryIE, + NicovideoSearchDateIE, + NicovideoSearchIE, + NicovideoSearchURLIE, + NicovideoTagURLIE, + NiconicoLiveIE, +) +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) +from .ninegag import NineGagIE +from .ninenow import NineNowIE +from .nintendo import NintendoIE +from .nitter import NitterIE +from .njpwworld import NJPWWorldIE +from .nobelprize import NobelPrizeIE +from .noice import NoicePodcastIE +from .nonktube import NonkTubeIE +from .noodlemagazine import NoodleMagazineIE +from .noovo import NoovoIE +from .normalboots import NormalbootsIE +from .nosvideo import NosVideoIE +from .nosnl import NOSNLArticleIE +from .nova import ( + NovaEmbedIE, + NovaIE, +) +from .novaplay import NovaPlayIE +from .nowness import ( + NownessIE, + NownessPlaylistIE, + NownessSeriesIE, +) +from .noz import NozIE +from .npo import ( + AndereTijdenIE, + NPOIE, + NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, + SchoolTVIE, + HetKlokhuisIE, + VPROIE, + WNLIE, +) +from .npr import NprIE +from .nrk import ( + NRKIE, + NRKPlaylistIE, + NRKSkoleIE, + NRKTVIE, + NRKTVDirekteIE, + NRKRadioPodkastIE, + NRKTVEpisodeIE, + NRKTVEpisodesIE, + NRKTVSeasonIE, + NRKTVSeriesIE, +) +from .nrl import NRLTVIE +from .ntvcojp import NTVCoJpCUIE +from .ntvde import NTVDeIE +from .ntvru import NTVRuIE +from .nubilesporn import NubilesPornIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, + NYTimesCookingIE, +) +from .nuvid import NuvidIE +from .nzherald import NZHeraldIE +from .nzonscreen import NZOnScreenIE +from .nzz import NZZIE +from .odatv import OdaTVIE +from .odkmedia import OnDemandChinaEpisodeIE +from .odnoklassniki import OdnoklassnikiIE +from .oftv import ( + OfTVIE, + OfTVPlaylistIE +) +from .oktoberfesttv import OktoberfestTVIE +from .olympics import OlympicsReplayIE +from .on24 import On24IE +from .ondemandkorea import OnDemandKoreaIE +from .onefootball import OneFootballIE +from .onenewsnz import OneNewsNZIE +from .oneplace import OnePlacePodcastIE +from .onet import ( + OnetIE, + OnetChannelIE, + OnetMVPIE, + OnetPlIE, +) +from .onionstudios import OnionStudiosIE +from .ooyala import ( + OoyalaIE, + OoyalaExternalIE, +) +from .opencast import ( + OpencastIE, + OpencastPlaylistIE, +) +from .openrec import ( + OpenRecIE, + OpenRecCaptureIE, + OpenRecMovieIE, +) +from .ora import OraTVIE +from .orf import ( + ORFTVthekIE, + ORFFM4StoryIE, + ORFRadioIE, + ORFIPTVIE, +) +from .outsidetv import OutsideTVIE +from .owncloud import OwnCloudIE +from .packtpub import ( + PacktPubIE, + PacktPubCourseIE, +) +from .palcomp3 import ( + PalcoMP3IE, + PalcoMP3ArtistIE, + PalcoMP3VideoIE, +) +from .pandoratv import PandoraTVIE +from .panopto import ( + PanoptoIE, + PanoptoListIE, + PanoptoPlaylistIE +) +from .paramountplus import ( + ParamountPlusIE, + ParamountPlusSeriesIE, +) +from .parler import ParlerIE +from .parlview import ParlviewIE +from .patreon import ( + PatreonIE, + PatreonCampaignIE +) +from .pbs import PBSIE, PBSKidsIE +from .pearvideo import PearVideoIE +from .peekvids import PeekVidsIE, PlayVidsIE +from .peertube import ( + PeerTubeIE, + PeerTubePlaylistIE, +) +from .peertv import PeerTVIE +from .peloton import ( + PelotonIE, + PelotonLiveIE +) +from .people import PeopleIE +from .performgroup import PerformGroupIE +from .periscope import ( + PeriscopeIE, + PeriscopeUserIE, +) +from .pgatour import PGATourIE +from .philharmoniedeparis import PhilharmonieDeParisIE +from .phoenix import PhoenixIE +from .photobucket import PhotobucketIE +from .piapro import PiaproIE +from .picarto import ( + PicartoIE, + PicartoVodIE, +) +from .piksel import PikselIE +from .pinkbike import PinkbikeIE +from .pinterest import ( + PinterestIE, + PinterestCollectionIE, +) +from .pixivsketch import ( + PixivSketchIE, + PixivSketchUserIE, +) +from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) +from .playfm import PlayFMIE +from .playplustv import PlayPlusTVIE +from .plays import PlaysTVIE +from .playstuff import PlayStuffIE +from .playsuisse import PlaySuisseIE +from .playtvak import PlaytvakIE +from .playvid import PlayvidIE +from .playwire import PlaywireIE +from .plutotv import PlutoTVIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) +from .podbayfm import PodbayFMIE, PodbayFMChannelIE +from .podchaser import PodchaserIE +from .podomatic import PodomaticIE +from .pokemon import ( + PokemonIE, + PokemonWatchIE, +) +from .pokergo import ( + PokerGoIE, + PokerGoCollectionIE, +) +from .polsatgo import PolsatGoIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioLegacyIE, + PolskieRadioAuditionIE, + PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, +) +from .popcorntimes import PopcorntimesIE +from .popcorntv import PopcornTVIE +from .porn91 import Porn91IE +from .porncom import PornComIE +from .pornflip import PornFlipIE +from .pornhd import PornHdIE +from .pornhub import ( + PornHubIE, + PornHubUserIE, + PornHubPlaylistIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, +) +from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE +from .pornoxo import PornoXOIE +from .pornez import PornezIE +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, +) +from .pr0gramm import Pr0grammStaticIE, Pr0grammIE +from .prankcast import PrankCastIE +from .premiershiprugby import PremiershipRugbyIE +from .presstv import PressTVIE +from .projectveritas import ProjectVeritasIE +from .prosiebensat1 import ProSiebenSat1IE +from .prx import ( + PRXStoryIE, + PRXSeriesIE, + PRXAccountIE, + PRXStoriesSearchIE, + PRXSeriesSearchIE +) +from .puls4 import Puls4IE +from .pyvideo import PyvideoIE +from .qdance import QDanceIE +from .qingting import QingTingIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, + QQMusicToplistIE, + QQMusicPlaylistIE, +) +from .r7 import ( + R7IE, + R7ArticleIE, +) +from .radiko import RadikoIE, RadikoRadioIE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) +from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE +from .radiobremen import RadioBremenIE +from .radiofrance import FranceCultureIE, RadioFranceIE +from .radiozet import RadioZetPodcastIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) +from .radlive import ( + RadLiveIE, + RadLiveChannelIE, + RadLiveSeasonIE, +) +from .rai import ( + RaiIE, + RaiCulturaIE, + RaiPlayIE, + RaiPlayLiveIE, + RaiPlayPlaylistIE, + RaiPlaySoundIE, + RaiPlaySoundLiveIE, + RaiPlaySoundPlaylistIE, + RaiNewsIE, + RaiSudtirolIE, +) +from .raywenderlich import ( + RayWenderlichIE, + RayWenderlichCourseIE, +) +from .rbmaradio import RBMARadioIE +from .rbgtum import ( + RbgTumIE, + RbgTumCourseIE, +) +from .rcs import ( + RCSIE, + RCSEmbedsIE, + RCSVariousIE, +) +from .rcti import ( + RCTIPlusIE, + RCTIPlusSeriesIE, + RCTIPlusTVIE, +) +from .rds import RDSIE +from .recurbate import RecurbateIE +from .redbee import ParliamentLiveUKIE, RTBFIE +from .redbulltv import ( + RedBullTVIE, + RedBullEmbedIE, + RedBullTVRrnContentIE, + RedBullIE, +) +from .reddit import RedditIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) +from .redtube import RedTubeIE +from .regiotv import RegioTVIE +from .rentv import ( + RENTVIE, + RENTVArticleIE, +) +from .restudy import RestudyIE +from .reuters import ReutersIE +from .reverbnation import ReverbNationIE +from .rheinmaintv import RheinMainTVIE +from .rice import RICEIE +from .rmcdecouverte import RMCDecouverteIE +from .rockstargames import RockstarGamesIE +from .rokfin import ( + RokfinIE, + RokfinStackIE, + RokfinChannelIE, + RokfinSearchIE, +) +from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE +from .rottentomatoes import RottenTomatoesIE +from .rozhlas import ( + RozhlasIE, + RozhlasVltavaIE, + MujRozhlasIE, +) +from .rte import RteIE, RteRadioIE +from .rtlnl import ( + RtlNlIE, + RTLLuTeleVODIE, + RTLLuArticleIE, + RTLLuLiveIE, + RTLLuRadioIE, +) +from .rtl2 import ( + RTL2IE, + RTL2YouIE, + RTL2YouSeriesIE, +) +from .rtnews import ( + RTNewsIE, + RTDocumentryIE, + RTDocumentryPlaylistIE, + RuptlyIE, +) +from .rtp import RTPIE +from .rtrfm import RTRFMIE +from .rts import RTSIE +from .rtvcplay import ( + RTVCPlayIE, + RTVCPlayEmbedIE, + RTVCKalturaIE, +) +from .rtve import ( + RTVEALaCartaIE, + RTVEAudioIE, + RTVELiveIE, + RTVEInfantilIE, + RTVETelevisionIE, +) +from .rtvnh import RTVNHIE +from .rtvs import RTVSIE +from .rtvslo import RTVSLOIE +from .ruhd import RUHDIE +from .rule34video import Rule34VideoIE +from .rumble import ( + RumbleEmbedIE, + RumbleIE, + RumbleChannelIE, +) +from .rutube import ( + RutubeIE, + RutubeChannelIE, + RutubeEmbedIE, + RutubeMovieIE, + RutubePersonIE, + RutubePlaylistIE, + RutubeTagsIE, +) +from .glomex import ( + GlomexIE, + GlomexEmbedIE, +) +from .megatvcom import ( + MegaTVComIE, + MegaTVComEmbedIE, +) +from .ant1newsgr import ( + Ant1NewsGrWatchIE, + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, +) +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .ruv import ( + RuvIE, + RuvSpilaIE +) +from .s4c import S4CIE +from .safari import ( + SafariIE, + SafariApiIE, + SafariCourseIE, +) +from .saitosan import SaitosanIE +from .samplefocus import SampleFocusIE +from .sapo import SapoIE +from .savefrom import SaveFromIE +from .sbs import SBSIE +from .screen9 import Screen9IE +from .screencast import ScreencastIE +from .screencastify import ScreencastifyIE +from .screencastomatic import ScreencastOMaticIE +from .scrippsnetworks import ( + ScrippsNetworksWatchIE, + ScrippsNetworksIE, +) +from .scte import ( + SCTEIE, + SCTECourseIE, +) +from .scrolller import ScrolllerIE +from .seeker import SeekerIE +from .senalcolombia import SenalColombiaLiveIE +from .senategov import SenateISVPIE, SenateGovIE +from .sendtonews import SendtoNewsIE +from .servus import ServusIE +from .sevenplus import SevenPlusIE +from .sexu import SexuIE +from .seznamzpravy import ( + SeznamZpravyIE, + SeznamZpravyArticleIE, +) +from .shahid import ( + ShahidIE, + ShahidShowIE, +) +from .shared import ( + SharedIE, + VivoIE, +) +from .sharevideos import ShareVideosEmbedIE +from .sibnet import SibnetEmbedIE +from .shemaroome import ShemarooMeIE +from .showroomlive import ShowRoomLiveIE +from .simplecast import ( + SimplecastIE, + SimplecastEpisodeIE, + SimplecastPodcastIE, +) +from .sina import SinaIE +from .sixplay import SixPlayIE +from .skeb import SkebIE +from .skyit import ( + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + SkyItIE, + SkyItArteIE, + CieloTVItIE, + TV8ItIE, +) +from .skylinewebcams import SkylineWebcamsIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) +from .skynewsau import SkyNewsAUIE +from .sky import ( + SkyNewsIE, + SkyNewsStoryIE, + SkySportsIE, + SkySportsNewsIE, +) +from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE +from .slutload import SlutloadIE +from .smotrim import SmotrimIE +from .snotr import SnotrIE +from .sohu import SohuIE +from .sonyliv import ( + SonyLIVIE, + SonyLIVSeriesIE, +) +from .soundcloud import ( + SoundcloudEmbedIE, + SoundcloudIE, + SoundcloudSetIE, + SoundcloudRelatedIE, + SoundcloudUserIE, + SoundcloudUserPermalinkIE, + SoundcloudTrackStationIE, + SoundcloudPlaylistIE, + SoundcloudSearchIE, +) +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) +from .southpark import ( + SouthParkIE, + SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, + SouthParkLatIE, + SouthParkNlIE +) +from .sovietscloset import ( + SovietsClosetIE, + SovietsClosetPlaylistIE +) +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) +from .spankwire import SpankwireIE +from .spiegel import SpiegelIE +from .spike import ( + BellatorIE, + ParamountNetworkIE, +) +from .stageplus import StagePlusVODConcertIE +from .startrek import StarTrekIE +from .stitcher import ( + StitcherIE, + StitcherShowIE, +) +from .sport5 import Sport5IE +from .sportbox import SportBoxIE +from .sportdeutschland import SportDeutschlandIE +from .spotify import ( + SpotifyIE, + SpotifyShowIE, +) +from .spreaker import ( + SpreakerIE, + SpreakerPageIE, + SpreakerShowIE, + SpreakerShowPageIE, +) +from .springboardplatform import SpringboardPlatformIE +from .sprout import SproutIE +from .srgssr import ( + SRGSSRIE, + SRGSSRPlayIE, +) +from .srmediathek import SRMediathekIE +from .stacommu import ( + StacommuLiveIE, + StacommuVODIE, +) +from .stanfordoc import StanfordOpenClassroomIE +from .startv import StarTVIE +from .steam import ( + SteamIE, + SteamCommunityBroadcastIE, +) +from .storyfire import ( + StoryFireIE, + StoryFireUserIE, + StoryFireSeriesIE, +) +from .streamable import StreamableIE +from .streamcloud import StreamcloudIE +from .streamcz import StreamCZIE +from .streamff import StreamFFIE +from .streetvoice import StreetVoiceIE +from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE +from .stv import STVPlayerIE +from .substack import SubstackIE +from .sunporno import SunPornoIE +from .sverigesradio import ( + SverigesRadioEpisodeIE, + SverigesRadioPublicationIE, +) +from .svt import ( + SVTIE, + SVTPageIE, + SVTPlayIE, + SVTSeriesIE, +) +from .swearnet import SwearnetEpisodeIE +from .swrmediathek import SWRMediathekIE +from .syvdk import SYVDKIE +from .syfy import SyfyIE +from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE +from .tass import TassIE +from .tbs import TBSIE +from .tdslifeway import TDSLifewayIE +from .teachable import ( + TeachableIE, + TeachableCourseIE, +) +from .teachertube import ( + TeacherTubeIE, + TeacherTubeUserIE, +) +from .teachingchannel import TeachingChannelIE +from .teamcoco import ( + TeamcocoIE, + ConanClassicIE, +) +from .teamtreehouse import TeamTreeHouseIE +from .techtalks import TechTalksIE +from .ted import ( + TedEmbedIE, + TedPlaylistIE, + TedSeriesIE, + TedTalkIE, +) +from .tele5 import Tele5IE +from .tele13 import Tele13IE +from .telebruxelles import TeleBruxellesIE +from .telecaribe import TelecaribePlayIE +from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE +from .telegram import TelegramEmbedIE +from .telemb import TeleMBIE +from .telemundo import TelemundoIE +from .telequebec import ( + TeleQuebecIE, + TeleQuebecSquatIE, + TeleQuebecEmissionIE, + TeleQuebecLiveIE, + TeleQuebecVideoIE, +) +from .teletask import TeleTaskIE +from .telewebion import TelewebionIE +from .tempo import TempoIE, IVXPlayerIE +from .tencent import ( + IflixEpisodeIE, + IflixSeriesIE, + VQQSeriesIE, + VQQVideoIE, + WeTvEpisodeIE, + WeTvSeriesIE, +) +from .tennistv import TennisTVIE +from .tenplay import TenPlayIE +from .testurl import TestURLIE +from .tf1 import TF1IE +from .tfo import TFOIE +from .theholetv import TheHoleTvIE +from .theintercept import TheInterceptIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) +from .thestar import TheStarIE +from .thesun import TheSunIE +from .theta import ( + ThetaVideoIE, + ThetaStreamIE, +) +from .theweatherchannel import TheWeatherChannelIE +from .thisamericanlife import ThisAmericanLifeIE +from .thisav import ThisAVIE +from .thisoldhouse import ThisOldHouseIE +from .thisvid import ( + ThisVidIE, + ThisVidMemberIE, + ThisVidPlaylistIE, +) +from .threespeak import ( + ThreeSpeakIE, + ThreeSpeakUserIE, +) +from .threeqsdn import ThreeQSDNIE +from .tiktok import ( + TikTokIE, + TikTokUserIE, + TikTokSoundIE, + TikTokEffectIE, + TikTokTagIE, + TikTokVMIE, + TikTokLiveIE, + DouyinIE, +) +from .tinypic import TinyPicIE +from .tmz import TMZIE +from .tnaflix import ( + TNAFlixNetworkEmbedIE, + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) +from .toggle import ( + ToggleIE, + MeWatchIE, +) +from .toggo import ( + ToggoIE, +) +from .tokentube import ( + TokentubeIE, + TokentubeChannelIE +) +from .tonline import TOnlineIE +from .toongoggles import ToonGogglesIE +from .toutv import TouTvIE +from .toypics import ToypicsUserIE, ToypicsIE +from .traileraddict import TrailerAddictIE +from .triller import ( + TrillerIE, + TrillerUserIE, + TrillerShortIE, +) +from .trilulilu import TriluliluIE +from .trovo import ( + TrovoIE, + TrovoVodIE, + TrovoChannelVodIE, + TrovoChannelClipIE, +) +from .trtcocuk import TrtCocukVideoIE +from .trueid import TrueIDIE +from .trunews import TruNewsIE +from .truth import TruthIE +from .trutv import TruTVIE +from .tube8 import Tube8IE +from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE +from .tubitv import ( + TubiTvIE, + TubiTvShowIE, +) +from .tumblr import TumblrIE +from .tunein import ( + TuneInStationIE, + TuneInPodcastIE, + TuneInPodcastEpisodeIE, + TuneInShortenerIE, +) +from .tunepk import TunePkIE +from .turbo import TurboIE +from .tv2 import ( + TV2IE, + TV2ArticleIE, + KatsomoIE, + MTVUutisetArticleIE, +) +from .tv24ua import ( + TV24UAVideoIE, +) +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) +from .tv2hu import ( + TV2HuIE, + TV2HuSeriesIE, +) +from .tv4 import TV4IE +from .tv5mondeplus import TV5MondePlusIE +from .tv5unis import ( + TV5UnisVideoIE, + TV5UnisIE, +) +from .tva import ( + TVAIE, + QubIE, +) +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) +from .tvc import ( + TVCIE, + TVCArticleIE, +) +from .tver import TVerIE +from .tvigle import TvigleIE +from .tviplayer import TVIPlayerIE +from .tvland import TVLandIE +from .tvn24 import TVN24IE +from .tvnet import TVNetIE +from .tvnoe import TVNoeIE +from .tvnow import ( + TVNowIE, + TVNowFilmIE, + TVNowNewIE, + TVNowSeasonIE, + TVNowAnnualIE, + TVNowShowIE, +) +from .tvopengr import ( + TVOpenGrWatchIE, + TVOpenGrEmbedIE, +) +from .tvp import ( + TVPEmbedIE, + TVPIE, + TVPStreamIE, + TVPVODSeriesIE, + TVPVODVideoIE, +) +from .tvplay import ( + TVPlayIE, + TVPlayHomeIE, +) +from .tvplayer import TVPlayerIE +from .tweakers import TweakersIE +from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE +from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import ( + TwitCastingIE, + TwitCastingLiveIE, + TwitCastingUserIE, +) +from .twitch import ( + TwitchVodIE, + TwitchCollectionIE, + TwitchVideosIE, + TwitchVideosClipsIE, + TwitchVideosCollectionsIE, + TwitchStreamIE, + TwitchClipsIE, +) +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, + TwitterBroadcastIE, + TwitterSpacesIE, + TwitterShortenerIE, +) +from .txxx import ( + TxxxIE, + PornTopIE, +) +from .udemy import ( + UdemyIE, + UdemyCourseIE +) +from .udn import UDNEmbedIE +from .ufctv import ( + UFCTVIE, + UFCArabiaIE, +) +from .ukcolumn import UkColumnIE +from .uktvplay import UKTVPlayIE +from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) +from .drooble import DroobleIE +from .umg import UMGDeIE +from .unistra import UnistraIE +from .unity import UnityIE +from .unscripted import UnscriptedNewsVideoIE +from .unsupported import KnownDRMIE, KnownPiracyIE +from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) +from .urort import UrortIE +from .urplay import URPlayIE +from .usanetwork import USANetworkIE +from .usatoday import USATodayIE +from .ustream import UstreamIE, UstreamChannelIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) +from .utreon import UtreonIE +from .varzesh3 import Varzesh3IE +from .vbox7 import Vbox7IE +from .veehd import VeeHDIE +from .veo import VeoIE +from .veoh import ( + VeohIE, + VeohUserIE +) +from .vesti import VestiIE +from .vevo import ( + VevoIE, + VevoPlaylistIE, +) +from .vgtv import ( + BTArticleIE, + BTVestlendingenIE, + VGTVIE, +) +from .vh1 import VH1IE +from .vice import ( + ViceIE, + ViceArticleIE, + ViceShowIE, +) +from .vidbit import VidbitIE +from .viddler import ViddlerIE +from .videa import VideaIE +from .videocampus_sachsen import ( + VideocampusSachsenIE, + ViMPPlaylistIE, +) +from .videodetective import VideoDetectiveIE +from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) +from .videopress import VideoPressIE +from .vidio import ( + VidioIE, + VidioPremierIE, + VidioLiveIE +) +from .vidlii import VidLiiIE +from .viewlift import ( + ViewLiftIE, + ViewLiftEmbedIE, +) +from .viidea import ViideaIE +from .vimeo import ( + VimeoIE, + VimeoAlbumIE, + VimeoChannelIE, + VimeoGroupsIE, + VimeoLikesIE, + VimeoOndemandIE, + VimeoProIE, + VimeoReviewIE, + VimeoUserIE, + VimeoWatchLaterIE, + VHXEmbedIE, +) +from .vimm import ( + VimmIE, + VimmRecordingIE, +) +from .vimple import VimpleIE +from .vine import ( + VineIE, + VineUserIE, +) +from .viki import ( + VikiIE, + VikiChannelIE, +) +from .viqeo import ViqeoIE +from .viu import ( + ViuIE, + ViuPlaylistIE, + ViuOTTIE, + ViuOTTIndonesiaIE, +) +from .vk import ( + VKIE, + VKUserVideosIE, + VKWallPostIE, + VKPlayIE, + VKPlayLiveIE, +) +from .vocaroo import VocarooIE +from .vodlocker import VodlockerIE +from .vodpl import VODPlIE +from .vodplatform import VODPlatformIE +from .voicerepublic import VoiceRepublicIE +from .voicy import ( + VoicyIE, + VoicyChannelIE, +) +from .volejtv import VolejTVIE +from .voot import ( + VootIE, + VootSeriesIE, +) +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) +from .vrt import ( + VRTIE, + VrtNUIE, + KetnetIE, + DagelijkseKostIE, +) +from .vrak import VrakIE +from .vrv import ( + VRVIE, + VRVSeriesIE, +) +from .vshare import VShareIE +from .vtm import VTMIE +from .medialaan import MedialaanIE +from .vuclip import VuClipIE +from .vupload import VuploadIE +from .vvvvid import ( + VVVVIDIE, + VVVVIDShowIE, +) +from .vyborymos import VyboryMosIE +from .vzaar import VzaarIE +from .wakanim import WakanimIE +from .walla import WallaIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) +from .wasdtv import ( + WASDTVStreamIE, + WASDTVRecordIE, + WASDTVClipIE, +) +from .wat import WatIE +from .watchbox import WatchBoxIE +from .watchindianporn import WatchIndianPornIE +from .wdr import ( + WDRIE, + WDRPageIE, + WDRElefantIE, + WDRMobileIE, +) +from .webcamerapl import WebcameraplIE +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) +from .weibo import ( + WeiboIE, + WeiboMobileIE +) +from .weiqitv import WeiqiTVIE +from .weverse import ( + WeverseIE, + WeverseMediaIE, + WeverseMomentIE, + WeverseLiveTabIE, + WeverseMediaTabIE, + WeverseLiveIE, +) +from .wevidi import WeVidiIE +from .weyyak import WeyyakIE +from .whyp import WhypIE +from .wikimedia import WikimediaIE +from .willow import WillowIE +from .wimbledon import WimbledonIE +from .wimtv import WimTVIE +from .whowatch import WhoWatchIE +from .wistia import ( + WistiaIE, + WistiaPlaylistIE, + WistiaChannelIE, +) +from .wordpress import ( + WordpressPlaylistEmbedIE, + WordpressMiniAudioPlayerEmbedIE, +) +from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) +from .wrestleuniverse import ( + WrestleUniverseVODIE, + WrestleUniversePPVIE, +) +from .wsj import ( + WSJIE, + WSJArticleIE, +) +from .wwe import WWEIE +from .wykop import ( + WykopDigIE, + WykopDigCommentIE, + WykopPostIE, + WykopPostCommentIE, +) +from .xanimu import XanimuIE +from .xbef import XBefIE +from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE +from .xhamster import ( + XHamsterIE, + XHamsterEmbedIE, + XHamsterUserIE, +) +from .ximalaya import ( + XimalayaIE, + XimalayaAlbumIE +) +from .xinpianchang import XinpianchangIE +from .xminus import XMinusIE +from .xnxx import XNXXIE +from .xstream import XstreamIE +from .xtube import XTubeUserIE, XTubeIE +from .xuite import XuiteIE +from .xvideos import ( + XVideosIE, + XVideosQuickiesIE +) +from .xxxymovies import XXXYMoviesIE +from .yahoo import ( + YahooIE, + YahooSearchIE, + YahooJapanNewsIE, +) +from .yandexdisk import YandexDiskIE +from .yandexmusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, + YandexMusicArtistTracksIE, + YandexMusicArtistAlbumsIE, +) +from .yandexvideo import ( + YandexVideoIE, + YandexVideoPreviewIE, + ZenYandexIE, + ZenYandexChannelIE, +) +from .yapfiles import YapFilesIE +from .yappy import ( + YappyIE, + YappyProfileIE, +) +from .yesjapan import YesJapanIE +from .yinyuetai import YinYueTaiIE +from .yle_areena import YleAreenaIE +from .ynet import YnetIE +from .youjizz import YouJizzIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) +from .younow import ( + YouNowLiveIE, + YouNowChannelIE, + YouNowMomentIE, +) +from .youporn import YouPornIE +from .yourporn import YourPornIE +from .yourupload import YourUploadIE +from .zaiko import ( + ZaikoIE, + ZaikoETicketIE, +) +from .zapiks import ZapiksIE +from .zattoo import ( + BBVTVIE, + BBVTVLiveIE, + BBVTVRecordingsIE, + EinsUndEinsTVIE, + EinsUndEinsTVLiveIE, + EinsUndEinsTVRecordingsIE, + EWETVIE, + EWETVLiveIE, + EWETVRecordingsIE, + GlattvisionTVIE, + GlattvisionTVLiveIE, + GlattvisionTVRecordingsIE, + MNetTVIE, + MNetTVLiveIE, + MNetTVRecordingsIE, + NetPlusTVIE, + NetPlusTVLiveIE, + NetPlusTVRecordingsIE, + OsnatelTVIE, + OsnatelTVLiveIE, + OsnatelTVRecordingsIE, + QuantumTVIE, + QuantumTVLiveIE, + QuantumTVRecordingsIE, + SaltTVIE, + SaltTVLiveIE, + SaltTVRecordingsIE, + SAKTVIE, + SAKTVLiveIE, + SAKTVRecordingsIE, + VTXTVIE, + VTXTVLiveIE, + VTXTVRecordingsIE, + WalyTVIE, + WalyTVLiveIE, + WalyTVRecordingsIE, + ZattooIE, + ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, +) +from .zdf import ZDFIE, ZDFChannelIE +from .zee5 import ( + Zee5IE, + Zee5SeriesIE, +) +from .zeenews import ZeeNewsIE +from .zhihu import ZhihuIE +from .zingmp3 import ( + ZingMp3IE, + ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, + ZingMp3HubIE, +) +from .zoom import ZoomIE +from .zype import ZypeIE diff --git a/plugins/youtube_download/yt_dlp/extractor/abc.py b/plugins/youtube_download/yt_dlp/extractor/abc.py index 9d6f5a4..f56133e 100644 --- a/plugins/youtube_download/yt_dlp/extractor/abc.py +++ b/plugins/youtube_download/yt_dlp/extractor/abc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import hashlib import hmac import re @@ -14,6 +12,7 @@ from ..utils import ( int_or_none, parse_iso8601, str_or_none, + traverse_obj, try_get, unescapeHTML, update_url_query, @@ -87,6 +86,15 @@ class ABCIE(InfoExtractor): 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', } + }, { + 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', + 'info_dict': { + 'id': '102520540', + 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus', + 'ext': 'mp4', + 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', + 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', + } }] def _real_extract(self, url): @@ -109,7 +117,7 @@ class ABCIE(InfoExtractor): video = True if mobj is None: - mobj = re.search(r'(?P)"sources": (?P \[[^\]]+\]),', webpage) + mobj = re.search(r'(?P )"(?:sources|files|renditions)":\s*(?P \[[^\]]+\])', webpage) if mobj is None: mobj = re.search( r'inline(?P Video|Audio|YouTube)Data\.push\((?P [^)]+)\);', @@ -123,7 +131,8 @@ class ABCIE(InfoExtractor): urls_info = self._parse_json( mobj.group('json_data'), video_id, transform_source=js_to_json) youtube = mobj.group('type') == 'YouTube' - video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4' + video = mobj.group('type') == 'Video' or traverse_obj( + urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4' if not isinstance(urls_info, list): urls_info = [urls_info] @@ -157,8 +166,6 @@ class ABCIE(InfoExtractor): 'format_id': format_id }) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -213,7 +220,7 @@ class ABCIViewIE(InfoExtractor): 'hdnea': token, }) - for sd in ('720', 'sd', 'sd-low'): + for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( stream, lambda x: x['streams']['hls'][sd], compat_str) if not sd_url: @@ -223,7 +230,6 @@ class ABCIViewIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if formats: break - self._sort_formats(formats) subtitles = {} src_vtt = stream.get('captions', {}).get('src-vtt') diff --git a/plugins/youtube_download/yt_dlp/extractor/abcnews.py b/plugins/youtube_download/yt_dlp/extractor/abcnews.py index 296b8ce..a57295b 100644 --- a/plugins/youtube_download/yt_dlp/extractor/abcnews.py +++ b/plugins/youtube_download/yt_dlp/extractor/abcnews.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .amp import AMPIE from .common import InfoExtractor from ..utils import ( diff --git a/plugins/youtube_download/yt_dlp/extractor/abcotvs.py b/plugins/youtube_download/yt_dlp/extractor/abcotvs.py index 5bff466..6dca19d 100644 --- a/plugins/youtube_download/yt_dlp/extractor/abcotvs.py +++ b/plugins/youtube_download/yt_dlp/extractor/abcotvs.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -82,7 +78,6 @@ class ABCOTVSIE(InfoExtractor): 'url': mp4_url, 'width': 640, }) - self._sort_formats(formats) image = video.get('image') or {} @@ -123,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor): title = video_data['title'] formats = self._extract_m3u8_formats( video_data['videoURL'].split('?')[0], video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/plugins/youtube_download/yt_dlp/extractor/abematv.py b/plugins/youtube_download/yt_dlp/extractor/abematv.py new file mode 100644 index 0000000..163b83c --- /dev/null +++ b/plugins/youtube_download/yt_dlp/extractor/abematv.py @@ -0,0 +1,472 @@ +import base64 +import binascii +import functools +import hashlib +import hmac +import io +import json +import re +import struct +import time +import urllib.parse +import urllib.request +import urllib.response +import uuid + +from .common import InfoExtractor +from ..aes import aes_ecb_decrypt +from ..utils import ( + ExtractorError, + bytes_to_intlist, + decode_base_n, + int_or_none, + intlist_to_bytes, + OnDemandPagedList, + time_seconds, + traverse_obj, + update_url_query, +) + + +def add_opener(ydl, handler): # FIXME: Create proper API in .networking + """Add a handler for opening URLs, like _download_webpage""" + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 + rh = ydl._request_director.handlers['Urllib'] + if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES: + return + opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies) + assert isinstance(opener, urllib.request.OpenerDirector) + opener.add_handler(handler) + rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license') + + +class AbemaLicenseHandler(urllib.request.BaseHandler): + handler_order = 499 + STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' + HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' + + def __init__(self, ie: 'AbemaTVIE'): + # the protocol that this should really handle is 'abematv-license://' + # abematv_license_open is just a placeholder for development purposes + # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510 + setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open')) + self.ie = ie + + def _get_videokey_from_ticket(self, ticket): + to_show = self.ie.get_param('verbose', False) + media_token = self.ie._get_media_token(to_show=to_show) + + license_response = self.ie._download_json( + 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False, + query={'t': media_token}, + data=json.dumps({ + 'kv': 'a', + 'lt': ticket + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + + res = decode_base_n(license_response['k'], table=self.STRTABLE) + encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) + + h = hmac.new( + binascii.unhexlify(self.HKEY), + (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + digestmod=hashlib.sha256) + enckey = bytes_to_intlist(h.digest()) + + return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) + + def abematv_license_open(self, url): + url = url.get_full_url() if isinstance(url, urllib.request.Request) else url + ticket = urllib.parse.urlparse(url).netloc + response_data = self._get_videokey_from_ticket(ticket) + return urllib.response.addinfourl(io.BytesIO(response_data), headers={ + 'Content-Length': str(len(response_data)), + }, url=url, code=200) + + +class AbemaTVBaseIE(InfoExtractor): + _USERTOKEN = None + _DEVICE_ID = None + _MEDIATOKEN = None + + _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe' + + @classmethod + def _generate_aks(cls, deviceid): + deviceid = deviceid.encode('utf-8') + # add 1 hour and then drop minute and secs + ts_1hour = int((time_seconds() // 3600 + 1) * 3600) + time_struct = time.gmtime(ts_1hour) + ts_1hour_str = str(ts_1hour).encode('utf-8') + + tmp = None + + def mix_once(nonce): + nonlocal tmp + h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256) + h.update(nonce) + tmp = h.digest() + + def mix_tmp(count): + nonlocal tmp + for i in range(count): + mix_once(tmp) + + def mix_twist(nonce): + nonlocal tmp + mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce) + + mix_once(cls._SECRETKEY) + mix_tmp(time_struct.tm_mon) + mix_twist(deviceid) + mix_tmp(time_struct.tm_mday % 5) + mix_twist(ts_1hour_str) + mix_tmp(time_struct.tm_hour % 5) + + return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') + + def _get_device_token(self): + if self._USERTOKEN: + return self._USERTOKEN + + username, _ = self._get_login_info() + AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) + if AbemaTVBaseIE._USERTOKEN: + # try authentication with locally stored token + try: + self._get_media_token(True) + return + except ExtractorError as e: + self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})') + + AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4()) + aks = self._generate_aks(self._DEVICE_ID) + user_data = self._download_json( + 'https://api.abema.io/v1/users', None, note='Authorizing', + data=json.dumps({ + 'deviceId': self._DEVICE_ID, + 'applicationKeySecret': aks, + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + AbemaTVBaseIE._USERTOKEN = user_data['token'] + + add_opener(self._downloader, AbemaLicenseHandler(self)) + return self._USERTOKEN + + def _get_media_token(self, invalidate=False, to_show=True): + if not invalidate and self._MEDIATOKEN: + return self._MEDIATOKEN + + AbemaTVBaseIE._MEDIATOKEN = self._download_json( + 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False, + query={ + 'osName': 'android', + 'osVersion': '6.0.1', + 'osLang': 'ja_JP', + 'osTimezone': 'Asia/Tokyo', + 'appId': 'tv.abema', + 'appVersion': '3.27.1' + }, headers={ + 'Authorization': f'bearer {self._get_device_token()}', + })['token'] + + return self._MEDIATOKEN + + def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'): + return self._download_json( + f'https://api.abema.io/{endpoint}', video_id, query=query or {}, + note=note, + headers={ + 'Authorization': f'bearer {self._get_device_token()}', + }) + + def _extract_breadcrumb_list(self, webpage, video_id): + for jld in re.finditer( + r'(?is)
(.+?)