Upgraded youtube_download plugin

This commit is contained in:
2026-05-26 20:50:58 -05:00
parent d55bc3ae97
commit 38ea00ec8f
87 changed files with 3385 additions and 1424 deletions

View File

@@ -4,6 +4,7 @@ import urllib.parse
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
@@ -1017,6 +1018,7 @@ class FacebookAdsIE(InfoExtractor):
'upload_date': '20240812',
'like_count': int,
},
'skip': 'Invalid URL',
}, {
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
'info_dict': {
@@ -1031,6 +1033,42 @@ class FacebookAdsIE(InfoExtractor):
},
'playlist_count': 3,
'skip': 'Invalid URL',
}, {
'url': 'https://www.facebook.com/ads/library/?id=312304267031140',
'info_dict': {
'id': '312304267031140',
'title': 'Casper Wave Hybrid Mattress',
'uploader': 'Casper',
'uploader_id': '224110981099062',
'uploader_url': 'https://www.facebook.com/Casper/',
'like_count': int,
},
'playlist_count': 2,
}, {
'url': 'https://www.facebook.com/ads/library/?id=874812092000430',
'info_dict': {
'id': '874812092000430',
'title': 'TikTok',
'uploader': 'Case \u00e0 Chocs',
'uploader_id': '112960472096793',
'uploader_url': 'https://www.facebook.com/Caseachocs/',
'like_count': int,
'description': 'md5:f02a255fcf7dce6ed40e9494cf4bc49a',
},
'playlist_count': 3,
}, {
'url': 'https://www.facebook.com/ads/library/?id=1704834754236452',
'info_dict': {
'id': '1704834754236452',
'ext': 'mp4',
'title': 'Get answers now!',
'description': 'Ask the best psychics and get accurate answers on questions that bother you!',
'uploader': 'Your Relationship Advisor',
'uploader_id': '108939234726306',
'uploader_url': 'https://www.facebook.com/100068970634636/',
'like_count': int,
'thumbnail': r're:https://.+/.+\.jpg',
},
}, {
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
'only_matching': True,
@@ -1060,15 +1098,45 @@ class FacebookAdsIE(InfoExtractor):
})
return formats
def _download_fb_webpage_and_verify(self, url, video_id):
# See https://github.com/yt-dlp/yt-dlp/issues/15577
try:
return self._download_webpage(url, video_id)
except ExtractorError as e:
if (
not isinstance(e.cause, HTTPError)
or e.cause.status != 403
or e.cause.reason != 'Client challenge'
):
raise
error_page = self._webpage_read_content(e.cause.response, url, video_id)
self.write_debug('Received a client challenge response')
challenge_path = self._search_regex(
r'fetch\s*\(\s*["\'](/__rd_verify[^"\']+)["\']',
error_page, 'challenge path')
# Successful response will set the necessary cookie
self._request_webpage(
urljoin(url, challenge_path), video_id, 'Requesting verification cookie',
'Unable to get verification cookie', data=b'')
return self._download_webpage(url, video_id)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_fb_webpage_and_verify(url, video_id)
post_data = traverse_obj(
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = get_first(post_data, (
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., (
('__bbox', 'result', 'data', 'ad_library_main', 'deeplink_ad_archive_result', 'deeplink_ad_archive'),
# old path
('entryPointRoot', 'otherProps', 'deeplinkAdCard'),
), 'snapshot', {dict}))
if not data:
raise ExtractorError('Unable to extract ad data')
@@ -1084,11 +1152,12 @@ class FacebookAdsIE(InfoExtractor):
'title': title,
'description': markup or None,
}, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
'description': (
(('body', 'text'), 'link_description'),
{lambda x: x if not x.startswith('{{product.') else None}, any),
'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}),
}))
@@ -1099,7 +1168,8 @@ class FacebookAdsIE(InfoExtractor):
entries.append({
'id': f'{video_id}_{idx}',
'title': entry.get('title') or title,
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
'description': traverse_obj(
entry, 'body', 'link_description', expected_type=str) or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry),
})