Plugin cleanup and tweaks
This commit is contained in:
@@ -1,18 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
@@ -26,7 +24,7 @@ from ..utils import (
|
||||
|
||||
class ZDFBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'uhd')
|
||||
|
||||
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
||||
headers = {}
|
||||
@@ -72,6 +70,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
|
||||
})
|
||||
new_formats = [f]
|
||||
formats.extend(merge_dicts(f, {
|
||||
@@ -111,7 +110,6 @@ class ZDFBaseIE(InfoExtractor):
|
||||
'class': track.get('class'),
|
||||
'language': track.get('language'),
|
||||
})
|
||||
self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference'))
|
||||
|
||||
duration = float_or_none(try_get(
|
||||
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
|
||||
@@ -122,6 +120,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self._extract_subtitles(ptmd),
|
||||
'_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'),
|
||||
}
|
||||
|
||||
def _extract_player(self, webpage, video_id, fatal=True):
|
||||
@@ -136,6 +135,34 @@ class ZDFBaseIE(InfoExtractor):
|
||||
class ZDFIE(ZDFBaseIE):
|
||||
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
|
||||
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
|
||||
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||
'info_dict': {
|
||||
'id': '210222_phx_nachgehakt_corona_protest',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613948400,
|
||||
'upload_date': '20210221',
|
||||
},
|
||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||
}, {
|
||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
'info_dict': {
|
||||
'id': '141007_ab18_10wochensommer_film',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||
'duration': 2660,
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
},
|
||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
|
||||
'info_dict': {
|
||||
'id': '211230_sendung_hjo',
|
||||
@@ -147,7 +174,8 @@ class ZDFIE(ZDFBaseIE):
|
||||
'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e',
|
||||
'title': 'heute journal vom 30.12.2021',
|
||||
'timestamp': 1640897100,
|
||||
}
|
||||
},
|
||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||
'info_dict': {
|
||||
@@ -162,7 +190,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||
'md5': '3d6f1049e9682178a11c54b91f3dd065',
|
||||
'md5': '57af4423db0455a3975d2dc4578536bc',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'video_funk_1770473',
|
||||
@@ -171,7 +199,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Alles ist verzaubert',
|
||||
'timestamp': 1635520560,
|
||||
'upload_date': '20211029',
|
||||
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
|
||||
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
|
||||
@@ -195,30 +223,57 @@ class ZDFIE(ZDFBaseIE):
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
|
||||
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
|
||||
'only_matching': True
|
||||
'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
|
||||
'info_dict': {
|
||||
'id': 'video_artede_083871-001-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tödliche Flucht (1/6)',
|
||||
'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
|
||||
'duration': 3193.0,
|
||||
'timestamp': 1641355200,
|
||||
'upload_date': '20220105',
|
||||
},
|
||||
'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
|
||||
}, {
|
||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||
'only_matching': True
|
||||
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
|
||||
'info_dict': {
|
||||
'id': '191205_1800_sendung_sok8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Geld anderer Leute',
|
||||
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
|
||||
'duration': 2581.0,
|
||||
'timestamp': 1675160100,
|
||||
'upload_date': '20230131',
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html',
|
||||
'info_dict': {
|
||||
'id': '220605_dk_gruener_planet_wuesten_tex',
|
||||
'ext': 'mp4',
|
||||
'title': 'Unser grüner Planet - Wüsten',
|
||||
'description': 'md5:4fc647b6f9c3796eea66f4a0baea2862',
|
||||
'duration': 2613.0,
|
||||
'timestamp': 1654450200,
|
||||
'upload_date': '20220605',
|
||||
'format_note': 'uhd, main',
|
||||
'thumbnail': 'https://www.zdf.de/assets/saguaro-kakteen-102~3840x2160?cb=1655910690796',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_entry(self, url, player, content, video_id):
|
||||
title = content.get('title') or content['teaserHeadline']
|
||||
|
||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
|
||||
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
|
||||
|
||||
ptmd_path = traverse_obj(t, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
|
||||
), get_all=False)
|
||||
if not ptmd_path:
|
||||
ptmd_path = traverse_obj(
|
||||
t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
'http://zdf.de/rels/streams/ptmd-template').replace(
|
||||
'{playerId}', 'ngplayer_2_4')
|
||||
raise ExtractorError('Could not extract ptmd_path')
|
||||
|
||||
info = self._extract_ptmd(
|
||||
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
||||
urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url)
|
||||
|
||||
thumbnails = []
|
||||
layouts = try_get(
|
||||
@@ -267,16 +322,16 @@ class ZDFIE(ZDFBaseIE):
|
||||
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
document = video['document']
|
||||
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
formats = []
|
||||
format_urls = set()
|
||||
for f in document['formitaeten']:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
self._sort_formats(formats)
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
document = formitaeten and video['document']
|
||||
if formitaeten:
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
format_urls = set()
|
||||
for f in formitaeten or []:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
|
||||
thumbnails = []
|
||||
teaser_bild = document.get('teaserBild')
|
||||
@@ -322,9 +377,9 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
||||
'info_dict': {
|
||||
'id': 'das-aktuelle-sportstudio',
|
||||
'title': 'das aktuelle sportstudio | ZDF',
|
||||
'title': 'das aktuelle sportstudio',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||
'info_dict': {
|
||||
@@ -332,6 +387,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'title': 'planet e.',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||
'info_dict': {
|
||||
'id': 'aktenzeichen-xy-ungeloest',
|
||||
'title': 'Aktenzeichen XY... ungelöst',
|
||||
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||
'only_matching': True,
|
||||
@@ -341,60 +404,36 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
def suitable(cls, url):
|
||||
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
|
||||
|
||||
def _og_search_title(self, webpage, fatal=False):
|
||||
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
|
||||
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(item_url, ie=ZDFIE.ie_key())
|
||||
for item_url in orderedSet(re.findall(
|
||||
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
||||
matches = re.finditer(
|
||||
r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
|
||||
webpage)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, self._og_search_title(webpage, fatal=False))
|
||||
if self._downloader.params.get('noplaylist', False):
|
||||
entry = next(
|
||||
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
||||
None)
|
||||
self.to_screen('Downloading just the main video because of --no-playlist')
|
||||
if entry:
|
||||
return entry
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
|
||||
|
||||
r"""
|
||||
player = self._extract_player(webpage, channel_id)
|
||||
def check_video(m):
|
||||
v_ref = self._search_regex(
|
||||
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
|
||||
webpage, 'check id', default='')
|
||||
v_ref = extract_attributes(v_ref)
|
||||
return v_ref.get('data-target-video-type') != 'novideo'
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
|
||||
'channel id', group='id')
|
||||
|
||||
channel = self._call_api(
|
||||
'https://api.zdf.de/content/documents/%s.json' % channel_id,
|
||||
player, url, channel_id)
|
||||
|
||||
items = []
|
||||
for module in channel['module']:
|
||||
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
|
||||
t = try_get(
|
||||
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
items.extend(try_get(
|
||||
t,
|
||||
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
items.extend(try_get(
|
||||
module,
|
||||
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
|
||||
entries = []
|
||||
entry_urls = set()
|
||||
for item in items:
|
||||
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
sharing_url = t.get('http://zdf.de/rels/sharing-url')
|
||||
if not sharing_url or not isinstance(sharing_url, compat_str):
|
||||
continue
|
||||
if sharing_url in entry_urls:
|
||||
continue
|
||||
entry_urls.add(sharing_url)
|
||||
entries.append(self.url_result(
|
||||
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
|
||||
|
||||
return self.playlist_result(entries, channel_id, channel.get('title'))
|
||||
"""
|
||||
return self.playlist_from_matches(
|
||||
(m.group('url') for m in matches if check_video(m)),
|
||||
channel_id, self._og_search_title(webpage, fatal=False))
|
||||
|
Reference in New Issue
Block a user