Upgrade yt_dlp and download script

This commit is contained in:
2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@@ -1,7 +1,7 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import compat_str
from ..networking import HEADRequest, Request
from ..utils import (
ExtractorError,
@@ -22,7 +22,7 @@ from ..utils import (
def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
return f'{{http://search.yahoo.com/mrss/}}{tag}'
class MTVServicesInfoExtractor(InfoExtractor):
@@ -42,7 +42,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return self._FEED_URL
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
search_path = '{}/{}'.format(_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
@@ -60,7 +60,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
response = self._request_webpage(req, mtvn_id, 'Resolving url')
url = response.url
# Transform the url to get the best quality:
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, count=1)
return [{'url': url, 'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id, video_id):
@@ -86,7 +86,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
rtmp_video_url = rendition.find('./src').text
if 'error_not_available.swf' in rtmp_video_url:
raise ExtractorError(
'%s said: video is not available' % self.IE_NAME,
f'{self.IE_NAME} said: video is not available',
expected=True)
if rtmp_video_url.endswith('siteunavail.png'):
continue
@@ -117,8 +117,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
if ext == 'cea-608':
ext = 'scc'
subtitles.setdefault(lang, []).append({
'url': compat_str(sub_src),
'ext': ext
'url': str(sub_src),
'ext': ext,
})
return subtitles
@@ -126,7 +126,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
uri = itemdoc.find('guid').text
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
content_el = itemdoc.find('{}/{}'.format(_media_xml_tag('group'), _media_xml_tag('content')))
mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
mediagen_url = mediagen_url.replace('device={device}', '')
if 'acceptMethods' not in mediagen_url:
@@ -137,14 +137,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
mediagen_doc = self._download_xml(
mediagen_url, video_id, 'Downloading video urls', fatal=False)
if mediagen_doc is False:
if not isinstance(mediagen_doc, xml.etree.ElementTree.Element):
return None
item = mediagen_doc.find('./video/item')
if item is not None and item.get('type') == 'text':
message = '%s returned error: ' % self.IE_NAME
message = f'{self.IE_NAME} returned error: '
if item.get('code') is not None:
message += '%s - ' % item.get('code')
message += '{} - '.format(item.get('code'))
message += item.text
raise ExtractorError(message, expected=True)
@@ -183,7 +183,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
episode = episode.text if episode is not None else None
if season and episode:
# episode number includes season, so remove it
episode = re.sub(r'^%s' % season, '', episode)
episode = re.sub(rf'^{season}', '', episode)
# This a short id that's used in the webpage urls
mtvn_id = None
@@ -254,7 +254,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
feed_url = try_get(
triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
compat_str)
str)
if not feed_url:
return
@@ -262,7 +262,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not feed:
return
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
return try_get(feed, lambda x: x['result']['data']['id'], str)
@staticmethod
def _extract_child_with_type(parent, t):
@@ -319,8 +319,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
title = url_basename(url)
webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage)
videos_info = self._get_videos_info(mgid, url=url)
return videos_info
return self._get_videos_info(mgid, url=url)
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
@@ -345,7 +344,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge', video_id)
return self._remove_template_parameter(config['feedWithQueryParams'])
def _real_extract(self, url):
@@ -442,14 +441,15 @@ class MTVVideoIE(MTVServicesInfoExtractor):
r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
if m_vevo:
vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
self.to_screen(f'Vevo video detected: {vevo_id}')
return self.url_result(f'vevo:{vevo_id}', ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri)
class MTVDEIE(MTVServicesInfoExtractor):
_WORKING = False
IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{
@@ -575,9 +575,9 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I
def _get_entries(self, title, url):
while True:
pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
entries = self._download_json(url, title, 'page %s' % pg)
entries = self._download_json(url, title, f'page {pg}')
url = try_get(
entries, lambda x: x['result']['nextPageURL'], compat_str)
entries, lambda x: x['result']['nextPageURL'], str)
entries = try_get(
entries, (
lambda x: x['result']['data']['items'],
@@ -596,15 +596,15 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I
info = self._download_json(info_url, video_id).get('manifest')
redirect = try_get(
info, lambda x: x['newLocation']['url'], compat_str)
info, lambda x: x['newLocation']['url'], str)
if redirect:
return self.url_result(redirect)
title = info.get('title')
video_id = try_get(
info, lambda x: x['reporting']['itemId'], compat_str)
info, lambda x: x['reporting']['itemId'], str)
parent_id = try_get(
info, lambda x: x['reporting']['parentId'], compat_str)
info, lambda x: x['reporting']['parentId'], str)
playlist_url = current_url = None
for z in (info.get('zones') or {}).values():
@@ -628,15 +628,15 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I
info, (
lambda x: x['title'],
lambda x: x['headline']),
compat_str)
description = try_get(info, lambda x: x['content'], compat_str)
str)
description = try_get(info, lambda x: x['content'], str)
if current_url:
season = try_get(
self._download_json(playlist_url, video_id, 'Seasons info'),
lambda x: x['result']['data'], dict)
current = try_get(
season, lambda x: x['currentSeason'], compat_str)
season, lambda x: x['currentSeason'], str)
seasons = try_get(
season, lambda x: x['seasons'], list) or []