Upgrade yt_dlp and download script
This commit is contained in:
@@ -3,7 +3,6 @@ import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -13,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@@ -28,6 +28,12 @@ class NRKBaseIE(InfoExtractor):
|
||||
)/'''
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
asset_url = update_url_query(asset_url, {
|
||||
# Remove 'adap' to return all streams (known values are: small, large, small_h265, large_h265)
|
||||
'adap': [],
|
||||
# Disable subtitles since they are fetched separately
|
||||
's': 0,
|
||||
})
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(asset_url, video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
@@ -53,13 +59,16 @@ class NRKBaseIE(InfoExtractor):
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {message}', expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query)
|
||||
video_id, note or f'Downloading {item} JSON',
|
||||
fatal=fatal, query=query, headers={
|
||||
# Needed for working stream URLs, see https://github.com/yt-dlp/yt-dlp/issues/12192
|
||||
'Accept': 'application/vnd.nrk.psapi+json; version=9; player=tv-player; device=player-core',
|
||||
})
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
@@ -78,14 +87,18 @@ class NRKIE(NRKBaseIE):
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': 'f46be075326e23ad0e524edfcb06aeb6',
|
||||
'md5': '2b88a652ad2e275591e61cf550887eec',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 262,
|
||||
}
|
||||
'upload_date': '20140325',
|
||||
'thumbnail': r're:^https?://gfx\.nrk\.no/.*$',
|
||||
'timestamp': 1395751833,
|
||||
'alt_title': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
},
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
@@ -96,7 +109,11 @@ class NRKIE(NRKBaseIE):
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
'timestamp': 1398429565,
|
||||
'alt_title': 'Cathrine Lie Wathne er blind, og bruker hurtigtaster for å navigere seg rundt på ulike nettsider.',
|
||||
'thumbnail': 'https://gfx.nrk.no/urxQMSXF-WnbfjBH5ke2igLGyN27EdJVWZ6FOsEAclhA',
|
||||
'upload_date': '20140425',
|
||||
},
|
||||
}, {
|
||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
@@ -153,10 +170,10 @@ class NRKIE(NRKBaseIE):
|
||||
return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
|
||||
raise
|
||||
|
||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||
# known values for preferredCdn: akamai, globalconnect and telenor
|
||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||
|
||||
video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id
|
||||
video_id = try_get(manifest, lambda x: x['id'], str) or video_id
|
||||
|
||||
if manifest.get('playability') == 'nonPlayable':
|
||||
self._raise_error(manifest['nonPlayable'])
|
||||
@@ -216,13 +233,13 @@ class NRKIE(NRKBaseIE):
|
||||
sub_key = str_or_none(sub.get('language')) or 'nb'
|
||||
sub_type = str_or_none(sub.get('type'))
|
||||
if sub_type:
|
||||
sub_key += '-%s' % sub_type
|
||||
sub_key += f'-{sub_type}'
|
||||
subtitles.setdefault(sub_key, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
legal_age = try_get(
|
||||
data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
|
||||
data, lambda x: x['legalAge']['body']['rating']['code'], str)
|
||||
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority
|
||||
age_limit = None
|
||||
if legal_age:
|
||||
@@ -243,13 +260,13 @@ class NRKIE(NRKBaseIE):
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str))
|
||||
'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)),
|
||||
}
|
||||
|
||||
if is_series:
|
||||
series = season_id = season_number = episode = episode_number = None
|
||||
programs = self._call_api(
|
||||
'programs/%s' % video_id, video_id, 'programs', fatal=False)
|
||||
f'programs/{video_id}', video_id, 'programs', fatal=False)
|
||||
if programs and isinstance(programs, dict):
|
||||
series = str_or_none(programs.get('seriesTitle'))
|
||||
season_id = str_or_none(programs.get('seasonId'))
|
||||
@@ -259,7 +276,7 @@ class NRKIE(NRKBaseIE):
|
||||
if not series:
|
||||
series = title
|
||||
if alt_title:
|
||||
title += ' - %s' % alt_title
|
||||
title += f' - {alt_title}'
|
||||
if not season_number:
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'Sesong\s+(\d+)', description or '', 'season number',
|
||||
@@ -289,7 +306,7 @@ class NRKIE(NRKBaseIE):
|
||||
class NRKTVIE(InfoExtractor):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
|
||||
_VALID_URL = rf'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*{_EPISODE_RE}'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
|
||||
@@ -306,8 +323,15 @@ class NRKTVIE(InfoExtractor):
|
||||
}],
|
||||
'nb-ttv': [{
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
}],
|
||||
},
|
||||
'upload_date': '20170627',
|
||||
'timestamp': 1498591822,
|
||||
'thumbnail': 'https://gfx.nrk.no/myRSc4vuFlahB60P3n6swwRTQUZI1LqJZl9B7icZFgzA',
|
||||
'alt_title': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
@@ -322,6 +346,13 @@ class NRKTVIE(InfoExtractor):
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23. mai 2014',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1584593700,
|
||||
'thumbnail': 'https://gfx.nrk.no/u7uCe79SEfPVGRAGVp2_uAZnNc4mfz_kjXg6Bgek8lMQ',
|
||||
'season_id': '126936',
|
||||
'upload_date': '20200319',
|
||||
'season': 'Season 2014',
|
||||
'season_number': 2014,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
@@ -411,7 +442,7 @@ class NRKTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
@@ -461,14 +492,14 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
nrk_id = info.get('@id') or self._html_search_meta(
|
||||
'nrk:program-id', webpage, default=None) or self._search_regex(
|
||||
r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
|
||||
rf'data-program-id=["\']({NRKTVIE._EPISODE_RE})', webpage,
|
||||
'nrk id')
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'id': nrk_id,
|
||||
'url': 'nrk:%s' % nrk_id,
|
||||
'url': f'nrk:{nrk_id}',
|
||||
'ie_key': NRKIE.ie_key(),
|
||||
'season_number': int(season_number),
|
||||
'episode_number': int(episode_number),
|
||||
@@ -483,13 +514,13 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
entries = []
|
||||
for episode in entry_list:
|
||||
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
if not nrk_id or not isinstance(nrk_id, str):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
f'nrk:{nrk_id}', ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
|
||||
_ASSETS_KEYS = ('episodes', 'instalments',)
|
||||
_ASSETS_KEYS = ('episodes', 'instalments')
|
||||
|
||||
def _extract_assets_key(self, embedded):
|
||||
for asset_key in self._ASSETS_KEYS:
|
||||
@@ -514,19 +545,18 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
(lambda x: x[assets_key]['_embedded'][assets_key],
|
||||
lambda x: x[assets_key]),
|
||||
list)
|
||||
for e in self._extract_entries(entries):
|
||||
yield e
|
||||
yield from self._extract_entries(entries)
|
||||
# Find next URL
|
||||
next_url_path = try_get(
|
||||
data,
|
||||
(lambda x: x['_links']['next']['href'],
|
||||
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
|
||||
compat_str)
|
||||
str)
|
||||
if not next_url_path:
|
||||
break
|
||||
data = self._call_api(
|
||||
next_url_path, display_id,
|
||||
note='Downloading %s JSON page %d' % (assets_key, page_num),
|
||||
note=f'Downloading {assets_key} JSON page {page_num}',
|
||||
fatal=False)
|
||||
if not data:
|
||||
break
|
||||
@@ -593,7 +623,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url)
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
@@ -601,14 +631,13 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
serie_kind = mobj.group('serie_kind')
|
||||
serie = mobj.group('serie')
|
||||
season_id = mobj.group('id') or mobj.group('id_2')
|
||||
display_id = '%s/%s' % (serie, season_id)
|
||||
display_id = f'{serie}/{season_id}'
|
||||
|
||||
data = self._call_api(
|
||||
'%s/catalog/%s/%s/seasons/%s'
|
||||
% (domain, self._catalog_name(serie_kind), serie, season_id),
|
||||
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}/seasons/{season_id}',
|
||||
display_id, 'season', query={'pageSize': 50})
|
||||
|
||||
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
|
||||
title = try_get(data, lambda x: x['titles']['title'], str) or display_id
|
||||
return self.playlist_result(
|
||||
self._entries(data, display_id),
|
||||
display_id, title)
|
||||
@@ -691,7 +720,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
return (
|
||||
False if any(ie.suitable(url)
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE))
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, serie_kind, series_id = self._match_valid_url(url).groups()
|
||||
@@ -700,8 +729,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
|
||||
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
|
||||
series = self._call_api(
|
||||
'%s/catalog/%s/%s'
|
||||
% (domain, self._catalog_name(serie_kind), series_id),
|
||||
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{series_id}',
|
||||
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||
titles = try_get(series, [
|
||||
lambda x: x['titles'],
|
||||
@@ -719,8 +747,8 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
season_url = urljoin(url, season.get('href'))
|
||||
if not season_url:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name)
|
||||
if season_name and isinstance(season_name, str):
|
||||
season_url = f'https://{domain}.nrk.no/serie/{series_id}/sesong/{season_name}'
|
||||
if season_url:
|
||||
entries.append(self.url_result(
|
||||
season_url, ie=NRKTVSeasonIE.ie_key(),
|
||||
@@ -777,7 +805,7 @@ class NRKRadioPodkastIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKPlaylistBaseIE(InfoExtractor):
|
||||
@@ -790,7 +818,7 @@ class NRKPlaylistBaseIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
|
||||
self.url_result(f'nrk:{video_id}', NRKIE.ie_key())
|
||||
for video_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
@@ -831,7 +859,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
|
||||
|
||||
class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
|
||||
_ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
|
||||
_ITEM_RE = rf'data-episode=["\']{NRKTVIE._EPISODE_RE}'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
|
||||
'info_dict': {
|
||||
@@ -869,7 +897,7 @@ class NRKSkoleIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
nrk_id = self._download_json(
|
||||
'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
|
||||
f'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/{video_id}',
|
||||
video_id)['psId']
|
||||
|
||||
return self.url_result('nrk:%s' % nrk_id)
|
||||
return self.url_result(f'nrk:{nrk_id}')
|
||||
|
Reference in New Issue
Block a user