Upgrade yt_dlp and download script

This commit is contained in:
2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@@ -1,5 +1,6 @@
import re
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -24,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38902413',
'ext': 'mp4',
'title': 'GCC IA16 backend',
'timestamp': 1648189972,
'upload_date': '20220325',
'timestamp': 1697793372,
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:42',
'chapters': 'count:41',
@@ -41,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115',
'timestamp': 1636996003,
'upload_date': '20231020',
'timestamp': 1697807002,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:640',
'chapters': 'count:639',
@@ -58,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
'id': '38973182',
'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
'upload_date': '20220201',
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135,
'timestamp': 1697822521,
'thumbnails': 'count:3',
'chapters': 'count:2',
'duration': 5889,
@@ -69,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
'skip_download': 'm3u8',
},
}, {
# service_name = youtube, only XML slides info
# formerly youtube, converted to native
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': {
'id': 'jmg02wCJD5M',
'display_id': '38897546',
'id': '38897546',
'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'channel': 'SlidesLive Videos - G1',
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader': 'SlidesLive Videos - G1',
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'live_status': 'not_live',
'upload_date': '20160710',
'timestamp': 1618786715,
'duration': 6827,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20231029',
'timestamp': 1698588144,
'thumbnails': 'count:169',
'playable_in_embed': True,
'availability': 'unlisted',
'tags': [],
'categories': ['People & Blogs'],
'chapters': 'count:168',
'duration': 6827,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# embed-only presentation, only XML slides info
@@ -110,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'timestamp': 1697803109,
'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@@ -127,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'MoReL: Multi-omics Relational Learning',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:7',
'timestamp': 1654714970,
'upload_date': '20220608',
'timestamp': 1697824939,
'upload_date': '20231020',
'chapters': 'count:6',
'duration': 171,
},
@@ -144,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Decentralized Attribution of Generative Models',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:16',
'timestamp': 1622806321,
'upload_date': '20210604',
'timestamp': 1697814901,
'upload_date': '20231020',
'chapters': 'count:15',
'duration': 306,
},
@@ -161,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:9',
'timestamp': 1654714896,
'upload_date': '20220608',
'timestamp': 1697824757,
'upload_date': '20231020',
'chapters': 'count:8',
'duration': 295,
},
@@ -176,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979880',
'ext': 'mp4',
'title': 'The Representation Power of Neural Networks',
'timestamp': 1654714962,
'timestamp': 1697824919,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:22',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:21',
'duration': 294,
},
@@ -199,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979682',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
'timestamp': 1654714920,
'timestamp': 1697824815,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:31',
'duration': 272,
},
@@ -212,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
'duration': 3,
'timestamp': 1654714920,
'upload_date': '20220608',
'timestamp': 1697824815,
'upload_date': '20231020',
},
}, {
'info_dict': {
@@ -221,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
'duration': 4,
'timestamp': 1654714920,
'upload_date': '20220608',
'timestamp': 1697824815,
'upload_date': '20231020',
},
}],
'params': {
@@ -241,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979481',
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
'timestamp': 1654714877,
'timestamp': 1697824716,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:43',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:43',
'duration': 315,
},
@@ -254,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
'duration': 3,
'timestamp': 1654714877,
'upload_date': '20220608',
'timestamp': 1697824716,
'upload_date': '20231020',
},
}],
'params': {
@@ -274,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader_id': '@slideslivevideos-a6075',
'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
'upload_date': '20200903',
'timestamp': 1602599092,
'timestamp': 1697805922,
'duration': 942,
'age_limit': 0,
'live_status': 'not_live',
@@ -302,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38983994',
'ext': 'mp4',
'title': 'Zero-Shot AutoML with Pretrained Models',
'timestamp': 1662384834,
'upload_date': '20220905',
'timestamp': 1697826708,
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:23',
'chapters': 'count:22',
@@ -335,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'timestamp': 1697803109,
'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@@ -385,7 +371,7 @@ class SlidesLiveIE(InfoExtractor):
if not line.startswith('#EXT-SL-'):
continue
tag, _, value = line.partition(':')
key = lookup.get(tag.lstrip('#EXT-SL-'))
key = lookup.get(tag[8:])
if not key:
continue
m3u8_dict[key] = value
@@ -469,11 +455,12 @@ class SlidesLiveIE(InfoExtractor):
slides = self._download_xml(
player_info['slides_xml_url'], video_id, fatal=False,
note='Downloading slides XML', errnote='Failed to download slides info')
slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
slides_info.append((
slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
int_or_none(xpath_text(slide, './timeSec', 'time'))))
if isinstance(slides, xml.etree.ElementTree.Element):
slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
for slide_id, slide in enumerate(slides.findall('./slide')):
slides_info.append((
slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
int_or_none(xpath_text(slide, './timeSec', 'time'))))
chapters, thumbnails = [], []
if url_or_none(player_info.get('thumbnail')):
@@ -528,7 +515,7 @@ class SlidesLiveIE(InfoExtractor):
if service_name == 'vimeo':
info['url'] = smuggle_url(
f'https://player.vimeo.com/video/{service_id}',
{'http_headers': {'Referer': url}})
{'referer': url})
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
if not video_slides:
@@ -545,7 +532,7 @@ class SlidesLiveIE(InfoExtractor):
}, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
if not traverse_obj(slide, ('video', 'service')) == 'yoda':
if traverse_obj(slide, ('video', 'service')) != 'yoda':
continue
video_path = traverse_obj(slide, ('video', 'id'))
cdn_hostname = traverse_obj(service_data, (