Upgrade yt_dlp and download script
This commit is contained in:
@@ -1,21 +1,18 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
urlhandle_detect_ext,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +36,7 @@ class WDRIE(InfoExtractor):
|
||||
|
||||
def _asset_url(self, wdr_id):
|
||||
id_len = max(len(wdr_id), 5)
|
||||
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
|
||||
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id), '.js'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -94,7 +91,7 @@ class WDRIE(InfoExtractor):
|
||||
medium_url, 'stream', fatal=False))
|
||||
else:
|
||||
a_format = {
|
||||
'url': medium_url
|
||||
'url': medium_url,
|
||||
}
|
||||
if ext == 'unknown_video':
|
||||
urlh = self._request_webpage(
|
||||
@@ -168,11 +165,12 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
'upload_date': '20160312',
|
||||
'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
|
||||
'is_live': False,
|
||||
'subtitles': {}
|
||||
'subtitles': {},
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
# FIXME: Asset JSON is directly embedded in webpage
|
||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-2296252',
|
||||
@@ -201,7 +199,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
'upload_date': 're:^[0-9]{8}$',
|
||||
'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
|
||||
},
|
||||
'skip': 'The id changes from week to week because of the new episode'
|
||||
'skip': 'The id changes from week to week because of the new episode',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
|
||||
@@ -221,11 +219,13 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
'id': 'mdb-869971',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'alt_title': 'COSMO Livestream',
|
||||
'live_status': 'is_live',
|
||||
'upload_date': '20160101',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html',
|
||||
@@ -248,6 +248,16 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-2741028',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baroness - Freak Valley Festival 2022',
|
||||
'alt_title': 'Rockpalast',
|
||||
'upload_date': '20220725',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -259,7 +269,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
# Article with several videos
|
||||
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
|
||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
||||
# to the page in a multiline "videoLink"-tag)
|
||||
@@ -268,7 +278,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
(?:
|
||||
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
||||
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
||||
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
)data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
''', webpage):
|
||||
media_link_obj = self._parse_json(
|
||||
mobj.group('data'), display_id, transform_source=js_to_json,
|
||||
@@ -276,14 +286,14 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
if not media_link_obj:
|
||||
continue
|
||||
jsonp_url = try_get(
|
||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||
media_link_obj, lambda x: x['mediaObj']['url'], str)
|
||||
if jsonp_url:
|
||||
# metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
|
||||
clip_id = media_link_obj['mediaObj'].get('ref')
|
||||
if jsonp_url.endswith('.assetjsonp'):
|
||||
asset = self._download_json(
|
||||
jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
|
||||
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
|
||||
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], str)
|
||||
if clip_id:
|
||||
jsonp_url = self._asset_url(clip_id[4:])
|
||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||
@@ -292,10 +302,10 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, mobj.group('href')),
|
||||
urllib.parse.urljoin(url, mobj.group('href')),
|
||||
ie=WDRPageIE.ie_key())
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
|
||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
|
||||
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
|
||||
]
|
||||
|
||||
@@ -311,8 +321,7 @@ class WDRElefantIE(InfoExtractor):
|
||||
'title': 'Wippe',
|
||||
'id': 'mdb-1198320',
|
||||
'ext': 'mp4',
|
||||
'age_limit': None,
|
||||
'upload_date': '20071003'
|
||||
'upload_date': '20071003',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -336,7 +345,7 @@ class WDRElefantIE(InfoExtractor):
|
||||
zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
|
||||
if zmdb_url_element is None:
|
||||
raise ExtractorError(
|
||||
'%s is not a video' % display_id, expected=True)
|
||||
f'{display_id} is not a video', expected=True)
|
||||
return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
|
||||
|
||||
|
||||
@@ -356,7 +365,7 @@ class WDRMobileIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Problems with loading data.'
|
||||
'skip': 'Problems with loading data.',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
Reference in New Issue
Block a user