Upgrade yt_dlp and download script

This commit is contained in:
2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@@ -2,13 +2,12 @@ import itertools
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
dict_get,
extract_attributes,
ExtractorError,
float_or_none,
int_or_none,
parse_duration,
@@ -21,18 +20,18 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
_VALID_URL = r'''(?x)
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)'
_VALID_URL = rf'''(?x)
https?://
(?:.+?\.)?%s/
(?:[^/?#]+\.)?{_DOMAINS}/
(?:
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
)
''' % _DOMAINS
'''
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'md5': 'e009ea6b849b129e3bebaeb9cf0dee51',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -44,6 +43,11 @@ class XHamsterIE(InfoExtractor):
'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/u3Vr5F2vvcU3yK59_jJqVA/001/509/445/1280x720.8.jpg',
'uploader_url': 'https://xhamster.com/users/ruseful2011',
'description': '',
'view_count': int,
'comment_count': int,
},
}, {
'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
@@ -57,6 +61,10 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/kk5nio_iR-h4Z3frfVtoDw/002/221/348/1280x720.4.jpg',
'comment_count': int,
},
'params': {
'skip_download': True,
@@ -74,6 +82,11 @@ class XHamsterIE(InfoExtractor):
'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
'comment_count': int,
'uploader_url': 'https://xhamster.com/users/parejafree',
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/xc8MSwVKcsQeRRiTT-saMQ/005/667/973/1280x720.2.jpg',
},
'params': {
'skip_download': True,
@@ -123,6 +136,9 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
'only_matching': True,
}, {
'url': 'https://xhamster20.desi/videos/my-verification-video-scottishmistress23-11937369',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -177,7 +193,7 @@ class XHamsterIE(InfoExtractor):
continue
format_urls.add(format_url)
formats.append({
'format_id': '%s-%s' % (format_id, quality),
'format_id': f'{format_id}-{quality}',
'url': format_url,
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
@@ -228,7 +244,7 @@ class XHamsterIE(InfoExtractor):
or str_or_none(standard_format.get('label'))
or '')
formats.append({
'format_id': '%s-%s' % (format_id, quality),
'format_id': f'{format_id}-{quality}',
'url': standard_url,
'ext': ext,
'height': get_height(quality),
@@ -245,7 +261,7 @@ class XHamsterIE(InfoExtractor):
if not isinstance(c, dict):
continue
c_name = c.get('name')
if isinstance(c_name, compat_str):
if isinstance(c_name, str):
categories.append(c_name)
else:
categories = None
@@ -258,7 +274,7 @@ class XHamsterIE(InfoExtractor):
'description': video.get('description'),
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
video, lambda x: x['author']['name'], compat_str),
video, lambda x: x['author']['name'], str),
'uploader_url': uploader_url,
'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
'thumbnail': video.get('thumbURL'),
@@ -268,7 +284,7 @@ class XHamsterIE(InfoExtractor):
video, lambda x: x['rating']['likes'], int)),
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
'comment_count': int_or_none(video.get('comments')),
'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
@@ -372,7 +388,7 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/xembed\.php\?video=(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
@@ -385,7 +401,7 @@ class XHamsterEmbedIE(InfoExtractor):
'uploader': 'ManyakisArt',
'duration': 5,
'age_limit': 18,
}
},
}
def _real_extract(self, url):
@@ -394,20 +410,20 @@ class XHamsterEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
rf'href="(https?://xhamster\.com/(?:movies/{video_id}/[^"]*\.html|videos/[^/]*-{video_id})[^"]*)"',
webpage, 'xhamster url', default=None)
if not video_url:
vars = self._parse_json(
player_vars = self._parse_json(
self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
video_id)
video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
video_url = dict_get(player_vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster')
class XHamsterUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)'
_TESTS = [{
# Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos',
@@ -422,6 +438,12 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,
@@ -430,11 +452,12 @@ class XHamsterUserIE(InfoExtractor):
'only_matching': True,
}]
def _entries(self, user_id):
next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
def _entries(self, user_id, is_user):
prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive')
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1):
page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum)
next_page_url, user_id, f'Downloading page {pagenum}')
for video_tag in re.findall(
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
page):
@@ -454,5 +477,5 @@ class XHamsterUserIE(InfoExtractor):
break
def _real_extract(self, url):
user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id), user_id)
user, user_id = self._match_valid_url(url).group('user', 'id')
return self.playlist_result(self._entries(user_id, bool(user)), user_id)