Upgrade yt_dlp and download script

This commit is contained in:
2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@@ -21,16 +21,20 @@ class TubeTuGrazBaseIE(InfoExtractor):
if not urlh:
return
content, urlh = self._download_webpage_handle(
response = self._download_webpage_handle(
urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in', errnote='unable to log in',
data=urlencode_postdata({
'lang': 'de',
'_eventId_proceed': '',
'j_username': username,
'j_password': password
'j_password': password,
}))
if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
if not response:
return
content, urlh = response
if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
return
if not self._html_search_regex(
@@ -39,7 +43,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
self.report_warning('unable to login: incorrect password')
return
content, urlh = self._download_webpage_handle(
urlh = self._request_webpage(
urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in with TFA', errnote='unable to log in with TFA',
data=urlencode_postdata({
@@ -53,33 +57,33 @@ class TubeTuGrazBaseIE(InfoExtractor):
self.report_warning('unable to login: incorrect TFA code')
def _extract_episode(self, episode_info):
id = episode_info.get('id')
video_id = episode_info.get('id')
formats = list(self._extract_formats(
traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id))
title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
creator = ', '.join(variadic(traverse_obj(
episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
return {
'id': id,
'id': video_id,
'title': title,
'creator': creator or None,
'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
'series': series_title,
'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
'episode': series_title and title,
'formats': formats
'formats': formats,
}
def _set_format_type(self, formats, type):
def _set_format_type(self, formats, fmt_type):
for f in formats:
f['format_note'] = type
if not type.startswith(self._FORMAT_TYPES[0]):
f['format_note'] = fmt_type
if not fmt_type.startswith(self._FORMAT_TYPES[0]):
f['preference'] = -2
return formats
def _extract_formats(self, format_list, id):
def _extract_formats(self, format_list, video_id):
has_hls, has_dash = False, False
for format_info in format_list or []:
@@ -87,7 +91,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
if url is None:
continue
type = format_info.get('type') or 'unknown'
fmt_type = format_info.get('type') or 'unknown'
transport = (format_info.get('transport') or 'https').lower()
if transport == 'https':
@@ -100,10 +104,10 @@ class TubeTuGrazBaseIE(InfoExtractor):
}]
elif transport == 'hls':
has_hls, formats = True, self._extract_m3u8_formats(
url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest')
elif transport == 'dash':
has_dash, formats = True, self._extract_mpd_formats(
url, id, fatal=False, note=f'downloading {type} DASH manifest')
url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest')
else:
# RTMP, HDS, SMOOTH, and unknown formats
# - RTMP url fails on every tested entry until now
@@ -111,21 +115,21 @@ class TubeTuGrazBaseIE(InfoExtractor):
# - SMOOTH url 404's on every tested entry until now
continue
yield from self._set_format_type(formats, type)
yield from self._set_format_type(formats, fmt_type)
# TODO: Add test for these
for type in self._FORMAT_TYPES:
for fmt_type in self._FORMAT_TYPES:
if not has_hls:
hls_formats = self._extract_m3u8_formats(
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
yield from self._set_format_type(hls_formats, type)
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8',
video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or []
yield from self._set_format_type(hls_formats, fmt_type)
if not has_dash:
dash_formats = self._extract_mpd_formats(
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
yield from self._set_format_type(dash_formats, type)
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd',
video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False)
yield from self._set_format_type(dash_formats, fmt_type)
class TubeTuGrazIE(TubeTuGrazBaseIE):
@@ -148,7 +152,7 @@ class TubeTuGrazIE(TubeTuGrazBaseIE):
'creator': 'Safran C',
'duration': 3295818,
'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
}
},
}, {
'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
@@ -158,7 +162,7 @@ class TubeTuGrazIE(TubeTuGrazBaseIE):
'ext': 'mp4',
},
'expected_warnings': ['Extractor failed to obtain "title"'],
}
},
]
def _real_extract(self, url):
@@ -193,7 +197,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 6127024,
}
},
},
{
'info_dict': {
@@ -205,7 +209,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5374422,
}
},
},
{
'info_dict': {
@@ -217,7 +221,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5566404,
}
},
},
{
'info_dict': {
@@ -229,24 +233,25 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5420200,
}
}
},
},
],
'min_playlist_count': 4
'playlist_mincount': 4,
}]
def _real_extract(self, url):
id = self._match_id(url)
episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
playlist_id = self._match_id(url)
episodes_data = self._download_json(
self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list')
series_data = self._download_json(
'https://tube.tugraz.at/series/series.json', id, fatal=False,
'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False,
note='downloading series metadata', errnote='failed to download series metadata',
query={
'seriesId': id,
'seriesId': playlist_id,
'count': 1,
'sort': 'TITLE'
'sort': 'TITLE',
})
return self.playlist_result(
map(self._extract_episode, episodes_data['search-results']['result']), id,
map(self._extract_episode, episodes_data['search-results']['result']), playlist_id,
traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))