restructured manifest and plugins loading; updated plugins

This commit is contained in:
2025-12-29 22:50:05 -06:00
parent c74f97aca7
commit 21120cd61e
324 changed files with 18088 additions and 15974 deletions

View File

@@ -1,5 +1,8 @@
import base64
import codecs
import itertools
import re
import string
from .common import InfoExtractor
from ..utils import (
@@ -10,15 +13,57 @@ from ..utils import (
extract_attributes,
float_or_none,
int_or_none,
join_nonempty,
parse_duration,
str_or_none,
try_call,
try_get,
unified_strdate,
url_or_none,
urljoin,
)
def to_signed_32(n):
return n % ((-1 if n < 0 else 1) * 2**32)
class _ByteGenerator:
def __init__(self, algo_id, seed):
try:
self._algorithm = getattr(self, f'_algo{algo_id}')
except AttributeError:
raise ExtractorError(f'Unknown algorithm ID: {algo_id}')
self._s = to_signed_32(seed)
def _algo1(self, s):
# LCG (a=1664525, c=1013904223, m=2^32)
# Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator
s = self._s = to_signed_32(s * 1664525 + 1013904223)
return s
def _algo2(self, s):
# xorshift32
# Ref: https://en.wikipedia.org/wiki/Xorshift
s = to_signed_32(s ^ (s << 13))
s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17))
s = self._s = to_signed_32(s ^ (s << 5))
return s
def _algo3(self, s):
# Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32)
# Ref: https://en.wikipedia.org/wiki/Weyl_sequence
# https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html
s = self._s = to_signed_32(s + 0x9e3779b9)
s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16))
s = to_signed_32(s * to_signed_32(0x85ebca77))
s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13))
s = to_signed_32(s * to_signed_32(0xc2b2ae3d))
return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16))
def __next__(self):
return self._algorithm(self._s) & 0xFF
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)'
_VALID_URL = rf'''(?x)
@@ -43,7 +88,7 @@ class XHamsterIE(InfoExtractor):
'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/u3Vr5F2vvcU3yK59_jJqVA/001/509/445/1280x720.8.jpg',
'thumbnail': r're:https?://.+\.jpg',
'uploader_url': 'https://xhamster.com/users/ruseful2011',
'description': '',
'view_count': int,
@@ -63,11 +108,12 @@ class XHamsterIE(InfoExtractor):
'age_limit': 18,
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/kk5nio_iR-h4Z3frfVtoDw/002/221/348/1280x720.4.jpg',
'thumbnail': r're:https?://.+\.jpg',
'comment_count': int,
},
'params': {
'skip_download': True,
'extractor_args': {'generic': {'impersonate': ['chrome']}},
'skip_download': 'm3u8',
},
}, {
# empty seo, unavailable via new URL schema
@@ -86,11 +132,9 @@ class XHamsterIE(InfoExtractor):
'uploader_url': 'https://xhamster.com/users/parejafree',
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/xc8MSwVKcsQeRRiTT-saMQ/005/667/973/1280x720.2.jpg',
},
'params': {
'skip_download': True,
'thumbnail': r're:https?://.+\.jpg',
},
'skip': 'Invalid URL',
}, {
# mobile site
'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
@@ -141,6 +185,43 @@ class XHamsterIE(InfoExtractor):
'only_matching': True,
}]
_XOR_KEY = b'xh7999'
def _decipher_format_url(self, format_url, format_id):
if all(char in string.hexdigits for char in format_url):
byte_data = bytes.fromhex(format_url)
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
byte_gen = _ByteGenerator(byte_data[0], seed)
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
cipher_type, _, ciphertext = try_call(
lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3
if not cipher_type or not ciphertext:
self.report_warning(f'Skipping format "{format_id}": failed to decipher URL')
return None
if cipher_type == 'xor':
return bytes(
a ^ b for a, b in
zip(ciphertext.encode(), itertools.cycle(self._XOR_KEY))).decode()
if cipher_type == 'rot13':
return codecs.decode(ciphertext, cipher_type)
self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"')
return None
def _fixup_formats(self, formats):
for f in formats:
if f.get('vcodec'):
continue
for vcodec in ('av1', 'h264'):
if any(f'.{vcodec}.' in f_url for f_url in (f['url'], f.get('manifest_url', ''))):
f['vcodec'] = vcodec
break
return formats
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
@@ -211,7 +292,7 @@ class XHamsterIE(InfoExtractor):
hls_url = hls_sources.get(hls_format_key)
if not hls_url:
continue
hls_url = urljoin(url, hls_url)
hls_url = self._decipher_format_url(hls_url, f'hls-{hls_format_key}')
if not hls_url or hls_url in format_urls:
continue
format_urls.add(hls_url)
@@ -220,7 +301,7 @@ class XHamsterIE(InfoExtractor):
m3u8_id='hls', fatal=False))
standard_sources = xplayer_sources.get('standard')
if isinstance(standard_sources, dict):
for format_id, formats_list in standard_sources.items():
for identifier, formats_list in standard_sources.items():
if not isinstance(formats_list, list):
continue
for standard_format in formats_list:
@@ -230,7 +311,11 @@ class XHamsterIE(InfoExtractor):
standard_url = standard_format.get(standard_format_key)
if not standard_url:
continue
standard_url = urljoin(url, standard_url)
quality = (str_or_none(standard_format.get('quality'))
or str_or_none(standard_format.get('label'))
or '')
format_id = join_nonempty(identifier, quality)
standard_url = self._decipher_format_url(standard_url, format_id)
if not standard_url or standard_url in format_urls:
continue
format_urls.add(standard_url)
@@ -240,11 +325,9 @@ class XHamsterIE(InfoExtractor):
standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = (str_or_none(standard_format.get('quality'))
or str_or_none(standard_format.get('label'))
or '')
formats.append({
'format_id': f'{format_id}-{quality}',
'format_id': format_id,
'url': standard_url,
'ext': ext,
'height': get_height(quality),
@@ -287,7 +370,8 @@ class XHamsterIE(InfoExtractor):
'comment_count': int_or_none(video.get('comments')),
'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
'formats': self._fixup_formats(formats),
'_format_sort_fields': ('res', 'proto', 'tbr'),
}
# Old layout fallback
@@ -390,19 +474,48 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/xembed\.php\?video=(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
_TESTS = [{
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
'id': '3328539',
'ext': 'mp4',
'title': 'Pen Masturbation',
'comment_count': int,
'description': '',
'display_id': 'pen-masturbation',
'timestamp': 1406581861,
'upload_date': '20140728',
'uploader': 'ManyakisArt',
'duration': 5,
'age_limit': 18,
'thumbnail': r're:https?://.+\.jpg',
'uploader_id': 'manyakisart',
'uploader_url': 'https://xhamster.com/users/manyakisart',
'view_count': int,
},
}
}]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://xhamster.com/awards/2023',
'info_dict': {
'id': 'xh2VnYn',
'ext': 'mp4',
'title': 'xHamster Awards 2023 - The Winners',
'age_limit': 18,
'comment_count': int,
'description': '',
'display_id': 'xhamster-awards-2023-the-winners',
'duration': 292,
'thumbnail': r're:https?://ic-vt-nss\.xhcdn\.com/.+',
'timestamp': 1700122082,
'upload_date': '20231116',
'uploader': 'xHamster',
'uploader_id': 'xhamster',
'uploader_url': 'https://xhamster.com/users/xhamster',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -437,13 +550,13 @@ class XHamsterUserIE(InfoExtractor):
'info_dict': {
'id': 'firatkaan',
},
'playlist_mincount': 1,
'playlist_mincount': 0,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
'playlist_mincount': 46,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,