Upgrade yt_dlp and download script
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
US_RATINGS,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
strip_jsonp,
|
||||
@@ -14,7 +14,6 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
|
||||
@@ -48,7 +47,7 @@ class PBSIE(InfoExtractor):
|
||||
(r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
|
||||
(r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
|
||||
(r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
|
||||
(r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
|
||||
(r'(?:video\.|www\.)pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
|
||||
(r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
|
||||
(r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
|
||||
(r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
|
||||
@@ -62,7 +61,7 @@ class PBSIE(InfoExtractor):
|
||||
(r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
|
||||
(r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
|
||||
(r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
|
||||
(r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
|
||||
(r'(?:video\.|www\.)thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
|
||||
(r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
|
||||
(r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
|
||||
(r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
|
||||
@@ -182,18 +181,19 @@ class PBSIE(InfoExtractor):
|
||||
)
|
||||
|
||||
IE_NAME = 'pbs'
|
||||
IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1])
|
||||
IE_DESC = 'Public Broadcasting Service (PBS) and member stations: {}'.format(', '.join(list(zip(*_STATIONS))[1]))
|
||||
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
|
||||
# Article with embedded player (or direct video)
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||
# Player
|
||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)
|
||||
# Player
|
||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/?#]+) |
|
||||
# Direct video URL, or article with embedded player
|
||||
(?:{})/(?:
|
||||
(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/#]|$) |
|
||||
(?:[^/?#]+/){{1,5}}(?P<presumptive_id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])
|
||||
)
|
||||
)
|
||||
''' % '|'.join(list(zip(*_STATIONS))[0])
|
||||
'''.format('|'.join(next(zip(*_STATIONS))))
|
||||
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
@@ -208,16 +208,40 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:31b664af3c65fd07fa460d306b837d00',
|
||||
'duration': 3190,
|
||||
},
|
||||
'skip': 'dead URL',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.thirteen.org/programs/the-woodwrights-shop/carving-away-with-mary-may-tioglz/',
|
||||
'info_dict': {
|
||||
'id': '3004803331',
|
||||
'ext': 'mp4',
|
||||
'title': "The Woodwright's Shop - Carving Away with Mary May",
|
||||
'description': 'md5:7cbaaaa8b9bcc78bd8f0e31911644e28',
|
||||
'duration': 1606,
|
||||
'display_id': 'carving-away-with-mary-may-tioglz',
|
||||
'chapters': [],
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/NcnTxNl-asset-mezzanine-16x9-K0Keoyv.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
'md5': '6f722cb3c3982186d34b0f13374499c7',
|
||||
'md5': '372b12b670070de39438b946474df92f',
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:5979a4d069b157f622d02bff62fbe654',
|
||||
'duration': 5050,
|
||||
'chapters': [
|
||||
{'start_time': 0.0, 'end_time': 1234.0, 'title': 'After Saddam, Chaos'},
|
||||
{'start_time': 1233.0, 'end_time': 1719.0, 'title': 'The Insurgency Takes Root'},
|
||||
{'start_time': 1718.0, 'end_time': 2461.0, 'title': 'A Light Footprint'},
|
||||
{'start_time': 2460.0, 'end_time': 3589.0, 'title': 'The Surge '},
|
||||
{'start_time': 3588.0, 'end_time': 4355.0, 'title': 'The Withdrawal '},
|
||||
{'start_time': 4354.0, 'end_time': 5051.0, 'title': 'ISIS on the March '},
|
||||
],
|
||||
'display_id': 'losing-iraq',
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/pbs/frontline/138098/images/mezzanine_401.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -404,6 +428,19 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pbssocal.org/shows/newshour/clip/capehart-johnson-1715984001',
|
||||
'info_dict': {
|
||||
'id': '3091549094',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Capehart and Johnson on the unusual Biden-Trump debate plans',
|
||||
'description': 'Capehart and Johnson on how the Biden-Trump debates could shape the campaign season',
|
||||
'display_id': 'capehart-johnson-1715984001',
|
||||
'duration': 593,
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/mF3oSVn-asset-mezzanine-16x9-QeXjXPy.jpg',
|
||||
'chapters': [],
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
@@ -415,7 +452,7 @@ class PBSIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
|
||||
'only_matching': True,
|
||||
}
|
||||
},
|
||||
]
|
||||
_ERRORS = {
|
||||
101: 'We\'re sorry, but this video is not yet available.',
|
||||
@@ -464,10 +501,12 @@ class PBSIE(InfoExtractor):
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||
r'\sclass="passportcoveplayer"[^>]*\sdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||
r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)', # last pattern to avoid false positives
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
@@ -518,7 +557,7 @@ class PBSIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_info = self._extract_video_data(
|
||||
player_page, 'video data', display_id)
|
||||
video_id = compat_str(
|
||||
video_id = str(
|
||||
video_info.get('id') or video_info['contentID'])
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
@@ -539,7 +578,7 @@ class PBSIE(InfoExtractor):
|
||||
|
||||
if isinstance(video_id, list):
|
||||
entries = [self.url_result(
|
||||
'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
|
||||
f'http://video.pbs.org/video/{vid_id}', 'PBS', vid_id)
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
@@ -568,11 +607,11 @@ class PBSIE(InfoExtractor):
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||
display_id, 'Downloading %s page' % page, fatal=False)
|
||||
f'http://player.pbs.org/{page}/{video_id}',
|
||||
display_id, f'Downloading {page} page', fatal=False)
|
||||
if player:
|
||||
video_info = self._extract_video_data(
|
||||
player, '%s video data' % page, display_id, fatal=False)
|
||||
player, f'{page} video data', display_id, fatal=False)
|
||||
if video_info:
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
@@ -603,7 +642,7 @@ class PBSIE(InfoExtractor):
|
||||
redirect_id = redirect.get('eeid')
|
||||
|
||||
redirect_info = self._download_json(
|
||||
'%s?format=json' % redirect['url'], display_id,
|
||||
'{}?format=json'.format(redirect['url']), display_id,
|
||||
'Downloading %s video url info' % (redirect_id or num),
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
@@ -614,7 +653,7 @@ class PBSIE(InfoExtractor):
|
||||
self.raise_geo_restricted(
|
||||
msg=message, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
f'{self.IE_NAME} said: {message}', expected=True)
|
||||
|
||||
format_url = redirect_info.get('url')
|
||||
if not format_url:
|
||||
@@ -649,7 +688,7 @@ class PBSIE(InfoExtractor):
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
|
||||
# This may produce invalid links sometimes (e.g.
|
||||
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate):
|
||||
if not self._is_valid_url(f_url, display_id, f'http-{bitrate}k video'):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
@@ -671,7 +710,7 @@ class PBSIE(InfoExtractor):
|
||||
captions = info.get('cc') or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url
|
||||
'url': caption_url,
|
||||
})
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs)
|
||||
|
||||
@@ -715,7 +754,7 @@ class PBSKidsIE(InfoExtractor):
|
||||
'description': 'md5:d006b2211633685d8ebc8d03b6d5611e',
|
||||
'categories': ['Episode'],
|
||||
'upload_date': '20190718',
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://pbskids.org/video/plum-landing/2365205059',
|
||||
@@ -730,8 +769,8 @@ class PBSKidsIE(InfoExtractor):
|
||||
'description': 'md5:657e5fc4356a84ead1c061eb280ff05d',
|
||||
'categories': ['Episode'],
|
||||
'upload_date': '20140302',
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -753,5 +792,5 @@ class PBSKidsIE(InfoExtractor):
|
||||
'series': ('video_obj', 'program_title', {str}),
|
||||
'title': ('video_obj', 'title', {str}),
|
||||
'upload_date': ('video_obj', 'air_date', {unified_strdate}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
|
Reference in New Issue
Block a user