Plugin cleanup and tweaks
This commit is contained in:
@@ -1,33 +1,15 @@
|
||||
import os
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
|
||||
from ..utils import load_plugins
|
||||
|
||||
_LAZY_LOADER = False
|
||||
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
try:
|
||||
from .lazy_extractors import *
|
||||
from .lazy_extractors import _ALL_CLASSES
|
||||
_LAZY_LOADER = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if not _LAZY_LOADER:
|
||||
from .extractors import *
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
|
||||
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
|
||||
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
|
||||
passthrough_module(__name__, '.extractors')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
from .extractors import _ALL_CLASSES
|
||||
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
@@ -38,17 +20,23 @@ def gen_extractors():
|
||||
return [klass() for klass in gen_extractor_classes()]
|
||||
|
||||
|
||||
def list_extractors(age_limit):
|
||||
"""
|
||||
Return a list of extractors that are suitable for the given age,
|
||||
sorted by extractor ID.
|
||||
"""
|
||||
def list_extractor_classes(age_limit=None):
|
||||
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
|
||||
from .generic import GenericIE
|
||||
|
||||
return sorted(
|
||||
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
|
||||
key=lambda ie: ie.IE_NAME.lower())
|
||||
yield from sorted(filter(
|
||||
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
|
||||
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
|
||||
yield GenericIE
|
||||
|
||||
|
||||
def list_extractors(age_limit=None):
|
||||
"""Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
|
||||
return [ie() for ie in list_extractor_classes(age_limit)]
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name + 'IE']
|
||||
from . import extractors
|
||||
|
||||
return getattr(extractors, f'{ie_name}IE')
|
||||
|
2404
plugins/youtube_download/yt_dlp/extractor/_extractors.py
Normal file
2404
plugins/youtube_download/yt_dlp/extractor/_extractors.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
@@ -157,8 +155,6 @@ class ABCIE(InfoExtractor):
|
||||
'format_id': format_id
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
@@ -213,7 +209,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
for sd in ('720', 'sd', 'sd-low'):
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
@@ -223,7 +219,6 @@ class ABCIViewIE(InfoExtractor):
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -82,7 +78,6 @@ class ABCOTVSIE(InfoExtractor):
|
||||
'url': mp4_url,
|
||||
'width': 640,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image') or {}
|
||||
|
||||
@@ -123,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor):
|
||||
title = video_data['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videoURL'].split('?')[0], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
522
plugins/youtube_download/yt_dlp/extractor/abematv.py
Normal file
522
plugins/youtube_download/yt_dlp/extractor/abematv.py
Normal file
@@ -0,0 +1,522 @@
|
||||
import base64
|
||||
import binascii
|
||||
import functools
|
||||
import hashlib
|
||||
import hmac
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
|
||||
self.ie = ie
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
|
||||
license_response = self.ie._download_json(
|
||||
'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
|
||||
_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
|
||||
|
||||
@classmethod
|
||||
def _generate_aks(cls, deviceid):
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
|
||||
tmp = None
|
||||
|
||||
def mix_once(nonce):
|
||||
nonlocal tmp
|
||||
h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
|
||||
h.update(nonce)
|
||||
tmp = h.digest()
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for i in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
nonlocal tmp
|
||||
mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
|
||||
|
||||
mix_once(cls._SECRETKEY)
|
||||
mix_tmp(time_struct.tm_mon)
|
||||
mix_twist(deviceid)
|
||||
mix_tmp(time_struct.tm_mday % 5)
|
||||
mix_twist(ts_1hour_str)
|
||||
mix_tmp(time_struct.tm_hour % 5)
|
||||
|
||||
return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
|
||||
|
||||
def _get_device_token(self):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
|
||||
|
||||
AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
|
||||
aks = self._generate_aks(self._DEVICE_ID)
|
||||
user_data = self._download_json(
|
||||
'https://api.abema.io/v1/users', None, note='Authorizing',
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
if not invalidate and self._MEDIATOKEN:
|
||||
return self._MEDIATOKEN
|
||||
|
||||
AbemaTVBaseIE._MEDIATOKEN = self._download_json(
|
||||
'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
|
||||
query={
|
||||
'osName': 'android',
|
||||
'osVersion': '6.0.1',
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1'
|
||||
}, headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})['token']
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
note=note,
|
||||
headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})
|
||||
|
||||
def _extract_breadcrumb_list(self, webpage, video_id):
|
||||
for jld in re.finditer(
|
||||
r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
webpage):
|
||||
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
|
||||
if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
|
||||
continue
|
||||
items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
|
||||
if items:
|
||||
return items
|
||||
return []
|
||||
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
'id': '194-25_s2_p1',
|
||||
'title': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'series': '異世界食堂2',
|
||||
'series_number': 2,
|
||||
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
|
||||
'info_dict': {
|
||||
'id': 'E8tvAnMJ7a9a5d',
|
||||
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series': 'ゆるキャン△ SEASON2',
|
||||
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series_number': 2,
|
||||
'episode_number': 1,
|
||||
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
|
||||
'info_dict': {
|
||||
'id': 'E8tvAnMJ7a9a5d',
|
||||
'title': '第5話『光射す』',
|
||||
'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
|
||||
'thumbnail': r're:https://hayabusa\.io/.+',
|
||||
'series': '相棒',
|
||||
'episode': '第5話『光射す』',
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/now-on-air/abema-anime',
|
||||
'info_dict': {
|
||||
'id': 'abema-anime',
|
||||
# this varies
|
||||
# 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
|
||||
'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
|
||||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
# (unless there's a way to hook before downloading by extractor)
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + self._get_device_token(),
|
||||
}
|
||||
video_type = video_type.split('/')[-1]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
canonical_url = self._search_regex(
|
||||
r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
|
||||
default=url)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
|
||||
if not title:
|
||||
jsonld = None
|
||||
for jld in re.finditer(
|
||||
r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
webpage):
|
||||
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
|
||||
if jsonld:
|
||||
break
|
||||
if jsonld:
|
||||
title = jsonld.get('caption')
|
||||
if not title and video_type == 'now-on-air':
|
||||
if not self._TIMETABLE:
|
||||
# cache the timetable because it goes to 5MiB in size (!!)
|
||||
self._TIMETABLE = self._download_json(
|
||||
'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
|
||||
headers=headers)
|
||||
now = time_seconds(hours=9)
|
||||
for slot in self._TIMETABLE.get('slots', []):
|
||||
if slot.get('channelId') != video_id:
|
||||
continue
|
||||
if slot['startAt'] <= now and now < slot['endAt']:
|
||||
title = slot['title']
|
||||
break
|
||||
|
||||
# read breadcrumb on top of page
|
||||
breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
|
||||
if breadcrumb:
|
||||
# breadcrumb list translates to: (e.g. 1st test for this IE)
|
||||
# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
|
||||
# hence this works
|
||||
info['series'] = breadcrumb[-2]
|
||||
info['episode'] = breadcrumb[-1]
|
||||
if not title:
|
||||
title = info['episode']
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
('description', 'og:description', 'twitter:description'), webpage)
|
||||
if og_desc:
|
||||
description = re.sub(r'''(?sx)
|
||||
^(.+?)(?:
|
||||
アニメの動画を無料で見るならABEMA!| # anime
|
||||
等、.+ # applies for most of categories
|
||||
)?
|
||||
''', r'\1', og_desc)
|
||||
|
||||
# canonical URL may contain series and episode number
|
||||
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
|
||||
if mobj:
|
||||
seri = int_or_none(mobj.group(1), default=float('inf'))
|
||||
epis = int_or_none(mobj.group(2), default=float('inf'))
|
||||
info['series_number'] = seri if seri < 100 else None
|
||||
# some anime like Detective Conan (though not available in AbemaTV)
|
||||
# has more than 1000 episodes (1026 as of 2021/11/15)
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
if video_id == 'news-global':
|
||||
channel_url = update_url_query(channel_url, {'division': '1'})
|
||||
onair_channels = self._download_json(channel_url, video_id)
|
||||
for ch in onair_channels['channels']:
|
||||
if video_id == ch['id']:
|
||||
m3u8_url = ch['playback']['hls']
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
|
||||
elif video_type == 'episode':
|
||||
api_response = self._download_json(
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||
elif video_type == 'slots':
|
||||
api_response = self._download_json(
|
||||
f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
raise ExtractorError('Unreachable')
|
||||
|
||||
if is_live:
|
||||
self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
|
||||
self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', live=is_live)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'info_dict': {
|
||||
'id': '90-1597',
|
||||
'title': 'シャッフルアイランド',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/193-132',
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/25-102',
|
||||
'info_dict': {
|
||||
'id': '25-102',
|
||||
'title': 'ソードアート・オンライン アリシゼーション',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, playlist_id, series_version, page):
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query={
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
})
|
||||
yield from (
|
||||
self.url_result(f'https://abema.tv/video/episode/{x}')
|
||||
for x in traverse_obj(programs, ('programs', ..., 'id')))
|
||||
|
||||
def _entries(self, playlist_id, series_version):
|
||||
return OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, playlist_id, series_version),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
|
||||
playlist_title=series_info.get('title'),
|
||||
playlist_description=series_info.get('content'))
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
|
199
plugins/youtube_download/yt_dlp/extractor/acfun.py
Normal file
199
plugins/youtube_download/yt_dlp/extractor/acfun.py
Normal file
@@ -0,0 +1,199 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class AcFunVideoBaseIE(InfoExtractor):
|
||||
def _extract_metadata(self, video_id, video_info):
|
||||
playjson = self._parse_json(video_info['ksPlayJson'], video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for f in fmts:
|
||||
f.update({
|
||||
'fps': float_or_none(video.get('frameRate')),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': float_or_none(video.get('avgBitrate')),
|
||||
**parse_codecs(video.get('codecs', ''))
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(video_info.get('durationMillis'), 1000),
|
||||
'timestamp': int_or_none(video_info.get('uploadTime'), 1000),
|
||||
'http_headers': {'Referer': 'https://www.acfun.cn/'},
|
||||
}
|
||||
|
||||
|
||||
class AcFunVideoIE(AcFunVideoBaseIE):
|
||||
_VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acfun.cn/v/ac35457073',
|
||||
'info_dict': {
|
||||
'id': '35457073',
|
||||
'ext': 'mp4',
|
||||
'duration': 174.208,
|
||||
'timestamp': 1656403967,
|
||||
'title': '1 8 岁 现 状',
|
||||
'description': '“赶紧回去!班主任查班了!”',
|
||||
'uploader': '锤子game',
|
||||
'uploader_id': '51246077',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'upload_date': '20220628',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
# example for len(video_list) > 1
|
||||
'url': 'https://www.acfun.cn/v/ac35468952_2',
|
||||
'info_dict': {
|
||||
'id': '35468952_2',
|
||||
'ext': 'mp4',
|
||||
'title': '【动画剧集】Rocket & Groot Season 1(2022)/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩',
|
||||
'duration': 90.459,
|
||||
'uploader': '比令',
|
||||
'uploader_id': '37259967',
|
||||
'upload_date': '20220629',
|
||||
'timestamp': 1656479962,
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id)
|
||||
|
||||
title = json_all.get('title')
|
||||
video_list = json_all.get('videoList') or []
|
||||
video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id'))
|
||||
if video_internal_id and len(video_list) > 1:
|
||||
part_idx, part_video_info = next(
|
||||
(idx + 1, v) for (idx, v) in enumerate(video_list)
|
||||
if v['id'] == video_internal_id)
|
||||
title = f'{title} P{part_idx:02d} {part_video_info["title"]}'
|
||||
|
||||
return {
|
||||
**self._extract_metadata(video_id, json_all['currentVideoInfo']),
|
||||
'title': title,
|
||||
'thumbnail': json_all.get('coverUrl'),
|
||||
'description': json_all.get('description'),
|
||||
'uploader': traverse_obj(json_all, ('user', 'name')),
|
||||
'uploader_id': traverse_obj(json_all, ('user', 'href')),
|
||||
'tags': traverse_obj(json_all, ('tagList', ..., 'name')),
|
||||
'view_count': int_or_none(json_all.get('viewCount')),
|
||||
'like_count': int_or_none(json_all.get('likeCountShow')),
|
||||
'comment_count': int_or_none(json_all.get('commentCountShow')),
|
||||
}
|
||||
|
||||
|
||||
class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
_VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2',
|
||||
'info_dict': {
|
||||
'id': 'aa6002917_36188_1745457__2',
|
||||
'ext': 'mp4',
|
||||
'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV',
|
||||
'upload_date': '20200916',
|
||||
'timestamp': 1600243813,
|
||||
'duration': 92.091,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645',
|
||||
'info_dict': {
|
||||
'id': 'aa5023171_36188_1750645',
|
||||
'ext': 'mp4',
|
||||
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
|
||||
'duration': 760.0,
|
||||
'season': '红孩儿之趴趴蛙寻石记',
|
||||
'season_id': 5023171,
|
||||
'season_number': 1, # series has only 1 season
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'upload_date': '20181223',
|
||||
'timestamp': 1545552185,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061',
|
||||
'info_dict': {
|
||||
'id': 'aa6065485_36188_1885061',
|
||||
'ext': 'mp4',
|
||||
'title': '叽歪老表(第二季) 第5话 坚不可摧',
|
||||
'season': '叽歪老表(第二季)',
|
||||
'season_number': 2,
|
||||
'season_id': 6065485,
|
||||
'episode': '坚不可摧',
|
||||
'episode_number': 5,
|
||||
'upload_date': '20220324',
|
||||
'timestamp': 1648082786,
|
||||
'duration': 105.002,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
ac_idx = parse_qs(url).get('ac', [None])[-1]
|
||||
video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
|
||||
|
||||
if ac_idx:
|
||||
video_info = json_bangumi_data['hlVideoInfo']
|
||||
return {
|
||||
**self._extract_metadata(video_id, video_info),
|
||||
'title': video_info.get('title'),
|
||||
}
|
||||
|
||||
video_info = json_bangumi_data['currentVideoInfo']
|
||||
|
||||
season_id = json_bangumi_data.get('bangumiId')
|
||||
season_number = season_id and next((
|
||||
idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1)
|
||||
if v.get('id') == season_id), 1)
|
||||
|
||||
json_bangumi_list = self._search_json(
|
||||
r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False)
|
||||
video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
|
||||
episode_number = video_internal_id and next((
|
||||
idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
|
||||
if v.get('videoId') == video_internal_id), None)
|
||||
|
||||
return {
|
||||
**self._extract_metadata(video_id, video_info),
|
||||
'title': json_bangumi_data.get('showTitle'),
|
||||
'thumbnail': json_bangumi_data.get('image'),
|
||||
'season': json_bangumi_data.get('bangumiTitle'),
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode': json_bangumi_data.get('title'),
|
||||
'episode_number': episode_number,
|
||||
'comment_count': int_or_none(json_bangumi_data.get('commentCount')),
|
||||
}
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
@@ -31,30 +28,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'id': '9841',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'title': 'Fruits Basket - Episode 1',
|
||||
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||
'series': 'Fruits Basket',
|
||||
'duration': 1437,
|
||||
'release_date': '20190405',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
@@ -78,14 +79,14 @@ class ADNIE(InfoExtractor):
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
@@ -126,10 +127,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_initialize(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
return
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
@@ -170,7 +168,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
@@ -237,7 +235,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
@@ -14,7 +11,7 @@ class AdobeConnectIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
|
@@ -1,26 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import getpass
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_urlparse,
|
||||
compat_getpass
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
MSO_INFO = {
|
||||
'DTV': {
|
||||
'name': 'DIRECTV',
|
||||
@@ -1345,10 +1339,20 @@ MSO_INFO = {
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'Suddenlink': {
|
||||
'name': 'Suddenlink',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'AlticeOne': {
|
||||
'name': 'Optimum TV',
|
||||
'username_field': 'j_username',
|
||||
'password_field': 'j_password',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class AdobePassIE(InfoExtractor):
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
@@ -1360,7 +1364,7 @@ class AdobePassIE(InfoExtractor):
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
@@ -1429,32 +1433,34 @@ class AdobePassIE(InfoExtractor):
|
||||
guid = xml_text(resource, 'guid') if '<' in resource else resource
|
||||
count = 0
|
||||
while count < 2:
|
||||
requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
|
||||
requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = self.get_param('ap_mso')
|
||||
if mso_id:
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
if not mso_id:
|
||||
raise_mvpd_required()
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
|
||||
if mso_id == 'Comcast_SSO':
|
||||
pass
|
||||
elif mso_id == 'Comcast_SSO':
|
||||
# Comcast page flow varies by video site and whether you
|
||||
# are on Comcast's network.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
@@ -1502,7 +1508,7 @@ class AdobePassIE(InfoExtractor):
|
||||
'send_confirm_link': False,
|
||||
'send_token': True
|
||||
}))
|
||||
philo_code = compat_getpass('Type auth code you have received [Return]: ')
|
||||
philo_code = getpass.getpass('Type auth code you have received [Return]: ')
|
||||
self._download_webpage(
|
||||
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
|
||||
'token': philo_code
|
||||
@@ -1635,6 +1641,58 @@ class AdobePassIE(InfoExtractor):
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Suddenlink':
|
||||
# Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
|
||||
# but they also do a dynmaic redirect using javascript that has to be followed as well
|
||||
first_bookend_page, urlh = post_form(
|
||||
provider_redirect_page_res, 'Pressing Continue...')
|
||||
|
||||
hidden_data = self._hidden_inputs(first_bookend_page)
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
|
||||
# Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already
|
||||
# have the login prompt or not
|
||||
if 'id="password" type="password" name="password"' in provider_login_redirect_page:
|
||||
provider_login_page_res = provider_login_redirect_page_res
|
||||
else:
|
||||
provider_tryauth_url = self._html_search_regex(
|
||||
r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
|
||||
provider_tryauth_page = self._download_webpage(
|
||||
provider_tryauth_url, video_id, 'Submitting TryAuth',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
|
||||
video_id, 'Getting Login Page',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
'Downloading Auth Association Redirect Page')
|
||||
|
||||
hidden_data = self._hidden_inputs(last_bookend_page)
|
||||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
@@ -1652,25 +1710,30 @@ class AdobePassIE(InfoExtractor):
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password
|
||||
}
|
||||
if mso_id == 'Cablevision':
|
||||
if mso_id in ('Cablevision', 'AlticeOne'):
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
try:
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
|
||||
@@ -1686,14 +1749,14 @@ class AdobePassIE(InfoExtractor):
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
if '<error' in authorize:
|
||||
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
@@ -1709,7 +1772,7 @@ class AdobePassIE(InfoExtractor):
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
return short_authorize
|
||||
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
@@ -72,7 +70,6 @@ class AdobeTVBaseIE(InfoExtractor):
|
||||
})
|
||||
s3_extracted = True
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -234,6 +231,7 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
|
||||
class AdobeTVVideoIE(AdobeTVBaseIE):
|
||||
IE_NAME = 'adobetv:video'
|
||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
|
||||
|
||||
_TEST = {
|
||||
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||
@@ -270,7 +268,6 @@ class AdobeTVVideoIE(AdobeTVBaseIE):
|
||||
'width': int_or_none(source.get('width') or None),
|
||||
'url': source_src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# For both metadata and downloaded files the duration varies among
|
||||
# formats. I just pick the max one
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
@@ -183,7 +180,6 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
info['subtitles'].setdefault('en', []).append({
|
||||
'url': asset_url,
|
||||
})
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
return info
|
||||
else:
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -12,7 +8,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_BASE_URL_REGEX = r'''(?x)https?://
|
||||
(?:(?:www|play|watch)\.)?
|
||||
(?P<domain>
|
||||
@@ -32,14 +28,17 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {'mbr': 'true'}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
|
||||
}
|
||||
if auth:
|
||||
query['auth'] = auth
|
||||
TP_SMIL_QUERY = [{
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak'
|
||||
'switch': 'hls_high_ak',
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3'
|
||||
'assetTypes': 'high_video_s3',
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3',
|
||||
'switch': 'hls_high_fastly',
|
||||
@@ -63,7 +62,6 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -305,7 +303,6 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
|
40
plugins/youtube_download/yt_dlp/extractor/aeonco.py
Normal file
40
plugins/youtube_download/yt_dlp/extractor/aeonco.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class AeonCoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery',
|
||||
'md5': 'e5884d80552c9b6ea8d268a258753362',
|
||||
'info_dict': {
|
||||
'id': '1284717',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brilliant Noise',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960',
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
||||
'info_dict': {
|
||||
'id': '728595228',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wrought',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
||||
'uploader': 'Biofilm Productions',
|
||||
'uploader_id': 'user140352216',
|
||||
'uploader_url': 'https://vimeo.com/user140352216',
|
||||
'duration': 1344
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
||||
return self.url_result(vimeo_url, VimeoIE)
|
@@ -1,14 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_xpath
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
date_from_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
@@ -32,7 +30,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/PLAYER/STATION/
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -170,6 +168,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/player/15055030',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -181,14 +182,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = {
|
||||
'szWork': 'login',
|
||||
'szType': 'json',
|
||||
@@ -284,7 +278,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
video_element = video_xml.findall('./track/video')[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
@@ -314,7 +308,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
|
||||
if not video_url:
|
||||
entries = []
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
file_elements = video_element.findall('./file')
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = url_or_none(file_element.text)
|
||||
@@ -344,7 +338,6 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}]
|
||||
if not formats and not self.get_param('ignore_no_formats'):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
@@ -386,7 +379,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
IE_NAME = 'afreecatv:live'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
@@ -416,26 +409,35 @@ class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
password = self.get_param('videopassword')
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
password_protected = channel_info.get('BPWD')
|
||||
if not broadcast_no:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
if password_protected == 'Y' and password is None:
|
||||
raise ExtractorError(
|
||||
'This livestream is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
params = {
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}
|
||||
if password is not None:
|
||||
params['pwd'] = password
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}),
|
||||
data=urlencode_postdata(params),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
@@ -461,8 +463,6 @@ class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
'quality': quality_key(quality_str),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
station_info = self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
query={'szBjId': broadcaster_id}, fatal=False,
|
||||
@@ -477,3 +477,57 @@ class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - review',
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
'title': 'parang1995 - highlight',
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - all',
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - balloonclip',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}]
|
||||
_PER_PAGE = 60
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
user_type = user_type or 'all'
|
||||
entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
|
||||
return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')
|
||||
|
251
plugins/youtube_download/yt_dlp/extractor/agora.py
Normal file
251
plugins/youtube_download/yt_dlp/extractor/agora.py
Normal file
@@ -0,0 +1,251 @@
|
||||
import functools
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
month_by_name,
|
||||
parse_duration,
|
||||
try_call,
|
||||
)
|
||||
|
||||
|
||||
class WyborczaVideoIE(InfoExtractor):
|
||||
# this id is not an article id, it has to be extracted from the article
|
||||
_VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
|
||||
IE_NAME = 'wyborcza:video'
|
||||
_TESTS = [{
|
||||
'url': 'wyborcza:video:26207634',
|
||||
'info_dict': {
|
||||
'id': '26207634',
|
||||
'ext': 'mp4',
|
||||
'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
|
||||
'description': ' ',
|
||||
'uploader': 'Dorota Roman',
|
||||
'duration': 2474,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/video/26207634',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/api-video/26207634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id)
|
||||
|
||||
formats = []
|
||||
base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath']
|
||||
for quality in ('standard', 'high'):
|
||||
if not meta['files'].get(quality):
|
||||
continue
|
||||
formats.append({
|
||||
'url': base_url + meta['files'][quality],
|
||||
'height': int_or_none(
|
||||
self._search_regex(
|
||||
r'p(\d+)[a-z]+\.mp4$', meta['files'][quality],
|
||||
'mp4 video height', default=None)),
|
||||
'format_id': quality,
|
||||
})
|
||||
if meta['files'].get('dash'):
|
||||
formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('lead'),
|
||||
'uploader': meta.get('signature'),
|
||||
'thumbnail': meta.get('imageUrl'),
|
||||
'duration': meta.get('duration'),
|
||||
}
|
||||
|
||||
|
||||
class WyborczaPodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:
|
||||
wyborcza\.pl/podcast(?:/0,172673\.html)?|
|
||||
wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
|
||||
)(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
|
||||
'info_dict': {
|
||||
'id': '100720',
|
||||
'ext': 'mp3',
|
||||
'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
|
||||
'uploader': 'Michał Nogaś ',
|
||||
'upload_date': '20210117',
|
||||
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
|
||||
'duration': 3684.0,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
|
||||
'info_dict': {
|
||||
'id': '100673',
|
||||
'ext': 'mp3',
|
||||
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
|
||||
'uploader': 'Agnieszka Urazińska ',
|
||||
'upload_date': '20210115',
|
||||
'description': 'md5:c161dc035f8dbb60077011fc41274899',
|
||||
'duration': 1803.0,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/podcast',
|
||||
'info_dict': {
|
||||
'id': '334',
|
||||
'title': 'Gościnnie: Wyborcza, 8:10',
|
||||
'series': 'Gościnnie: Wyborcza, 8:10',
|
||||
},
|
||||
'playlist_mincount': 370,
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
|
||||
'info_dict': {
|
||||
'id': '395',
|
||||
'title': 'Gościnnie: Wysokie Obcasy',
|
||||
'series': 'Gościnnie: Wysokie Obcasy',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
podcast_id = self._match_id(url)
|
||||
|
||||
if not podcast_id: # playlist
|
||||
podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334'
|
||||
return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id)
|
||||
|
||||
meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id,
|
||||
query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None})
|
||||
|
||||
day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'),
|
||||
'upload date', group=(1, 2, 3), default=(None, None, None))
|
||||
return {
|
||||
'id': podcast_id,
|
||||
'url': meta['url'],
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('imageUrl'),
|
||||
'duration': parse_duration(meta.get('duration')),
|
||||
'uploader': meta.get('author'),
|
||||
'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
|
||||
}
|
||||
|
||||
|
||||
class TokFMPodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
|
||||
IE_NAME = 'tokfm:podcast'
|
||||
_TESTS = [{
|
||||
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
||||
'info_dict': {
|
||||
'id': '91275',
|
||||
'ext': 'aac',
|
||||
'title': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'series': 'Analizy',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
# in case it breaks see this but it returns a lot of useless data
|
||||
# https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
|
||||
metadata = self._download_json(
|
||||
f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata')
|
||||
if not metadata:
|
||||
raise ExtractorError('No such podcast', expected=True)
|
||||
metadata = metadata[0]
|
||||
|
||||
formats = []
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
'url': url_data['link_ssl'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'title': metadata.get('podcast_name'),
|
||||
'series': metadata.get('series_name'),
|
||||
'episode': metadata.get('podcast_name'),
|
||||
}
|
||||
|
||||
|
||||
class TokFMAuditionIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
|
||||
IE_NAME = 'tokfm:audition'
|
||||
_TESTS = [{
|
||||
'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
|
||||
'info_dict': {
|
||||
'id': '218',
|
||||
'title': 'Analizy',
|
||||
'series': 'Analizy',
|
||||
},
|
||||
'playlist_count': 1635,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
|
||||
audition_id, 'Downloading audition metadata', headers=self._HEADERS)
|
||||
if not data:
|
||||
raise ExtractorError('No such audition', expected=True)
|
||||
data = data[0]
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, audition_id, data), self._PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': audition_id,
|
||||
'title': data.get('series_name'),
|
||||
'series': data.get('series_name'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _fetch_page(self, audition_id, data, page):
|
||||
for retry in self.RetryManager():
|
||||
podcast_page = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
|
||||
audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS)
|
||||
if not podcast_page:
|
||||
retry.error = ExtractorError('Agora returned empty page', expected=True)
|
||||
|
||||
for podcast in podcast_page:
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'url': podcast['podcast_sharing_url'],
|
||||
'ie_key': TokFMPodcastIE.ie_key(),
|
||||
'title': podcast.get('podcast_name'),
|
||||
'episode': podcast.get('podcast_name'),
|
||||
'description': podcast.get('podcast_description'),
|
||||
'timestamp': int_or_none(podcast.get('podcast_timestamp')),
|
||||
'series': data.get('series_name'),
|
||||
}
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
96
plugins/youtube_download/yt_dlp/extractor/airtv.py
Normal file
96
plugins/youtube_download/yt_dlp/extractor/airtv.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
)
|
||||
|
||||
|
||||
class AirTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# without youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
|
||||
'info_dict': {
|
||||
'id': 'W87jcWleSn2hXZN47zJZsQ',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20221003',
|
||||
'release_timestamp': 1664792603,
|
||||
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
|
||||
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
|
||||
'upload_date': '20221003',
|
||||
'duration': 151,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
'info_dict': {
|
||||
'id': '2ZTqmpee-bQ',
|
||||
'ext': 'mp4',
|
||||
'comment_count': int,
|
||||
'tags': 'count:11',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Newsflare',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'title': 'Geese Chase Alligator Across Golf Course',
|
||||
'uploader_id': 'NewsflareBreaking',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
|
||||
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
|
||||
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
|
||||
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({'url': source.get('src'), 'ext': ext})
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
|
||||
if nextjs_json.get('youtube_id'):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': nextjs_json.get('description') or None,
|
||||
'duration': int_or_none(nextjs_json.get('duration')),
|
||||
'thumbnails': [
|
||||
{'url': thumbnail}
|
||||
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
|
||||
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
|
||||
'timestamp': parse_iso8601(nextjs_json.get('created')),
|
||||
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
|
||||
'view_count': int_or_none(nextjs_json.get('views')),
|
||||
}
|
60
plugins/youtube_download/yt_dlp/extractor/aitube.py
Normal file
60
plugins/youtube_download/yt_dlp/extractor/aitube.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, merge_dicts
|
||||
|
||||
|
||||
class AitubeKZVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# id paramater as first parameter
|
||||
'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1',
|
||||
'info_dict': {
|
||||
'id': '9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
'ext': 'mp4',
|
||||
'duration': 2174.0,
|
||||
'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe',
|
||||
'like_count': int,
|
||||
'channel': 'ASTANA TV',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими',
|
||||
'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667370519,
|
||||
'title': 'Ангел хранитель 1 серия',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# id parameter is not as first paramater
|
||||
'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo']
|
||||
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage),
|
||||
'description': nextjs_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'view_count': (nextjs_data.get('viewCount')
|
||||
or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))),
|
||||
'like_count': nextjs_data.get('likeCount'),
|
||||
'channel': nextjs_data.get('channelTitle'),
|
||||
'channel_id': nextjs_data.get('channelId'),
|
||||
'thumbnail': nextjs_data.get('coverUrl'),
|
||||
'comment_count': nextjs_data.get('commentCount'),
|
||||
'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')),
|
||||
}, json_ld_data)
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -18,7 +15,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,12 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
@@ -102,10 +100,7 @@ class AllocineIE(InfoExtractor):
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||
' - AlloCiné')
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
@@ -117,8 +112,6 @@ class AllocineIE(InfoExtractor):
|
||||
})
|
||||
duration, view_count, timestamp = [None] * 3
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
|
83
plugins/youtube_download/yt_dlp/extractor/alsace20tv.py
Normal file
83
plugins/youtube_download/yt_dlp/extractor/alsace20tv.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
formats = []
|
||||
for res, fmt_url in (info.get('files') or {}).items():
|
||||
formats.extend(
|
||||
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||
if '/smil:_' in fmt_url
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||
'view_count': int_or_none(info.get('nb_vues')),
|
||||
}
|
||||
|
||||
|
||||
class Alsace20TVIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 1073,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id, url)
|
||||
|
||||
|
||||
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id)
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -66,22 +63,13 @@ class AluraIE(InfoExtractor):
|
||||
f['height'] = int('720' if m.group('res') == 'hd' else '480')
|
||||
formats.extend(video_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
"formats": formats
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
pass
|
||||
def _perform_login(self, username, password):
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
@@ -123,7 +111,7 @@ class AluraIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class AluraCourseIE(AluraIE):
|
||||
class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
_VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
|
||||
_LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
|
@@ -1,6 +1,17 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AmazonStoreIE(InfoExtractor):
|
||||
@@ -10,7 +21,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
|
||||
'info_dict': {
|
||||
'id': 'B098XNCHLD',
|
||||
'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed',
|
||||
'title': str,
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'playlist': [{
|
||||
@@ -19,28 +30,48 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'mcdodo usb c cable 100W 5a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 34,
|
||||
},
|
||||
}]
|
||||
}],
|
||||
'expected_warnings': ['Unable to extract data'],
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
|
||||
'info_dict': {
|
||||
'id': 'B0863TXGM3',
|
||||
'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff',
|
||||
'title': str,
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
'expected_warnings': ['Unable to extract data'],
|
||||
}, {
|
||||
'url': 'https://www.amazon.com/dp/B0845NXCXF/',
|
||||
'info_dict': {
|
||||
'id': 'B0845NXCXF',
|
||||
'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171',
|
||||
'title': str,
|
||||
},
|
||||
'playlist-mincount': 1,
|
||||
'expected_warnings': ['Unable to extract data'],
|
||||
}, {
|
||||
'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
|
||||
'info_dict': {
|
||||
'id': 'B08WX337PQ',
|
||||
'title': str,
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'expected_warnings': ['Unable to extract data'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
transform_source=js_to_json)
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
|
||||
entries = [{
|
||||
'id': video['marketPlaceID'],
|
||||
'url': video['url'],
|
||||
@@ -50,4 +81,90 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title'])
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
|
||||
|
||||
class AmazonReviewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P<id>[^/&#$?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl',
|
||||
'info_dict': {
|
||||
'id': 'R10VE9VUSY19L3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Get squad #Suspicious',
|
||||
'description': 'md5:7012695052f440a1e064e402d87e0afb',
|
||||
'uploader': 'Kimberly Cronkright',
|
||||
'average_rating': 1.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'expected_warnings': ['Review body was not found in webpage'],
|
||||
}, {
|
||||
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US',
|
||||
'info_dict': {
|
||||
'id': 'R10VE9VUSY19L3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Get squad #Suspicious',
|
||||
'description': 'md5:7012695052f440a1e064e402d87e0afb',
|
||||
'uploader': 'Kimberly Cronkright',
|
||||
'average_rating': 1.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'expected_warnings': ['Review body was not found in webpage'],
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/',
|
||||
'info_dict': {
|
||||
'id': 'RV1CO8JN5VGXV',
|
||||
'ext': 'mp4',
|
||||
'title': 'Not sure about its durability',
|
||||
'description': 'md5:1a252c106357f0a3109ebf37d2e87494',
|
||||
'uploader': 'Shoaib Gulzar',
|
||||
'average_rating': 2.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'expected_warnings': ['Review body was not found in webpage'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
review_body = get_element_by_attribute('data-hook', 'review-body', webpage)
|
||||
if not review_body:
|
||||
retry.error = ExtractorError('Review body was not found in webpage', expected=True)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
|
||||
manifest_url = self._search_regex(
|
||||
r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None)
|
||||
if url_or_none(manifest_url):
|
||||
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<input[^>]+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None)
|
||||
if url_or_none(video_url):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-mp4',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats('No video found for this customer review', expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage))
|
||||
or self._html_extract_title(webpage)),
|
||||
'description': clean_html(traverse_obj(re.findall(
|
||||
r'<span(?:\s+class="cr-original-review-content")?>(.+?)</span>', review_body), -1)),
|
||||
'uploader': clean_html(get_element_by_class('a-profile-name', webpage)),
|
||||
'average_rating': float_or_none(clean_html(get_element_by_attribute(
|
||||
'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]),
|
||||
'thumbnail': self._search_regex(
|
||||
r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
290
plugins/youtube_download/yt_dlp/extractor/amazonminitv.py
Normal file
290
plugins/youtube_download/yt_dlp/extractor/amazonminitv.py
Normal file
@@ -0,0 +1,290 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
|
||||
|
||||
|
||||
class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._download_webpage(
|
||||
'https://www.amazon.in/minitv', None,
|
||||
note='Fetching guest session cookies')
|
||||
AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
|
||||
|
||||
def _call_api(self, asin, data=None, note=None):
|
||||
device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
|
||||
if data:
|
||||
data['variables'].update({
|
||||
'contentType': 'VOD',
|
||||
'sessionIdToken': self.session_id,
|
||||
**device,
|
||||
})
|
||||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
**device,
|
||||
})
|
||||
|
||||
if resp.get('errors'):
|
||||
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
|
||||
elif not data:
|
||||
return resp
|
||||
return resp['data'][data['operationName']]
|
||||
|
||||
|
||||
class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
|
||||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
'duration': 846,
|
||||
'chapters': 'count:2',
|
||||
'series': 'Couple Goals',
|
||||
'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
|
||||
'episode': 'May I Kiss You?',
|
||||
'episode_number': 2,
|
||||
'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
'duration': 783,
|
||||
'chapters': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY_CONTENT = '''
|
||||
query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
|
||||
content(
|
||||
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
|
||||
contentId: $contentId
|
||||
contentType: $contentType
|
||||
) {
|
||||
contentId
|
||||
name
|
||||
... on Episode {
|
||||
contentId
|
||||
vodType
|
||||
name
|
||||
images
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
publicReleaseDateUTC
|
||||
audioTracks
|
||||
seasonId
|
||||
seriesId
|
||||
seriesName
|
||||
seasonNumber
|
||||
episodeNumber
|
||||
timecode {
|
||||
endCreditsTime
|
||||
}
|
||||
}
|
||||
... on MovieContent {
|
||||
contentId
|
||||
vodType
|
||||
name
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
images
|
||||
publicReleaseDateUTC
|
||||
audioTracks
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
prs = self._call_api(asin, note='Downloading playback info')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for type_, asset in prs['playbackAssets'].items():
|
||||
if not traverse_obj(asset, 'manifestUrl'):
|
||||
continue
|
||||
if type_ == 'hls':
|
||||
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=type_, fatal=False)
|
||||
formats.extend(m3u8_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
elif type_ == 'dash':
|
||||
mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
|
||||
formats.extend(mpd_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, mpd_subs)
|
||||
else:
|
||||
self.report_warning(f'Unknown asset type: {type_}')
|
||||
|
||||
title_info = self._call_api(
|
||||
asin, note='Downloading title info', data={
|
||||
'operationName': 'content',
|
||||
'variables': {'contentId': asin},
|
||||
'query': self._GRAPHQL_QUERY_CONTENT,
|
||||
})
|
||||
credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
|
||||
is_episode = title_info.get('vodType') == 'EPISODE'
|
||||
|
||||
return {
|
||||
'id': asin,
|
||||
'title': title_info.get('name'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'language': traverse_obj(title_info, ('audioTracks', 0)),
|
||||
'thumbnails': [{
|
||||
'id': type_,
|
||||
'url': url,
|
||||
} for type_, url in (title_info.get('images') or {}).items()],
|
||||
'description': traverse_obj(title_info, ('description', 'synopsis')),
|
||||
'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
|
||||
'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
|
||||
'chapters': [{
|
||||
'start_time': credits_time,
|
||||
'title': 'End Credits',
|
||||
}] if credits_time else [],
|
||||
'series': title_info.get('seriesName'),
|
||||
'series_id': title_info.get('seriesId'),
|
||||
'season_number': title_info.get('seasonNumber'),
|
||||
'season_id': title_info.get('seasonId'),
|
||||
'episode': title_info.get('name') if is_episode else None,
|
||||
'episode_number': title_info.get('episodeNumber'),
|
||||
'episode_id': asin if is_episode else None,
|
||||
}
|
||||
|
||||
|
||||
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:season'
|
||||
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'playlist_mincount': 6,
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
},
|
||||
}, {
|
||||
'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY = '''
|
||||
query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
|
||||
getEpisodes(
|
||||
applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
|
||||
episodeOrSeasonId: $episodeOrSeasonId
|
||||
) {
|
||||
episodes {
|
||||
... on Episode {
|
||||
contentId
|
||||
name
|
||||
images
|
||||
seriesName
|
||||
seasonId
|
||||
seriesId
|
||||
seasonNumber
|
||||
episodeNumber
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
publicReleaseDateUTC
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
def _entries(self, asin):
|
||||
season_info = self._call_api(
|
||||
asin, note='Downloading season info', data={
|
||||
'operationName': 'getEpisodes',
|
||||
'variables': {'episodeOrSeasonId': asin},
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
})
|
||||
|
||||
for episode in season_info['episodes']:
|
||||
yield self.url_result(
|
||||
f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
return self.playlist_result(self._entries(asin), asin)
|
||||
|
||||
|
||||
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:series'
|
||||
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
},
|
||||
}, {
|
||||
'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY = '''
|
||||
query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
|
||||
getSeasons(
|
||||
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
|
||||
episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
|
||||
) {
|
||||
seasons {
|
||||
seasonId
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
def _entries(self, asin):
|
||||
season_info = self._call_api(
|
||||
asin, note='Downloading series info', data={
|
||||
'operationName': 'getSeasons',
|
||||
'variables': {'episodeOrSeasonOrSeriesId': asin},
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
})
|
||||
|
||||
for season in season_info['seasons']:
|
||||
yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
return self.playlist_result(self._entries(asin), asin)
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
@@ -12,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
@@ -109,7 +106,6 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_urls = [properties.get('imageDesktop')]
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
@@ -22,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'weeknight-japanese-suppers',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
'timestamp': 1523304000,
|
||||
'upload_date': '20180409',
|
||||
'release_date': '20180409',
|
||||
'series': 'America\'s Test Kitchen',
|
||||
'season': 'Season 18',
|
||||
'episode': 'Japanese Suppers',
|
||||
'season_number': 18,
|
||||
'episode_number': 15,
|
||||
'duration': 1376,
|
||||
'thumbnail': r're:^https?://',
|
||||
'average_rating': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -43,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'timestamp': 1610737200,
|
||||
'upload_date': '20210115',
|
||||
'release_date': '20210115',
|
||||
'series': 'America\'s Test Kitchen',
|
||||
'season': 'Season 21',
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'season_number': 21,
|
||||
'episode_number': 3,
|
||||
'duration': 1397,
|
||||
'thumbnail': r're:^https?://',
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -60,10 +67,10 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -93,7 +100,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
@@ -104,7 +111,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
@@ -113,17 +120,17 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = self._match_valid_url(url).groups()
|
||||
show_path, season_number = self._match_valid_url(url).group('show', 'id')
|
||||
season_number = int(season_number)
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
slug = 'cco' if show_path == '/cookscountry' else 'atk'
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
@@ -139,12 +146,12 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -13,7 +10,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AMPIE(InfoExtractor):
|
||||
class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
# parse Akamai Adaptive Media Player feed
|
||||
def _extract_feed_info(self, url):
|
||||
feed = self._download_json(
|
||||
@@ -87,8 +84,6 @@ class AMPIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
|
||||
return {
|
||||
|
56
plugins/youtube_download/yt_dlp/extractor/angel.py
Normal file
56
plugins/youtube_download/yt_dlp/extractor/angel.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?angel\.com/watch/(?P<series>[^/?#]+)/episode/(?P<id>[\w-]+)/season-(?P<season_number>\d+)/episode-(?P<episode_number>\d+)/(?P<title>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.angel.com/watch/tuttle-twins/episode/2f3d0382-ea82-4cdc-958e-84fbadadc710/season-1/episode-1/when-laws-give-you-lemons',
|
||||
'md5': '4734e5cfdd64a568e837246aa3eaa524',
|
||||
'info_dict': {
|
||||
'id': '2f3d0382-ea82-4cdc-958e-84fbadadc710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
|
||||
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 1359.0
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
|
||||
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
|
||||
'info_dict': {
|
||||
'id': '8dfb714d-bca5-4812-8125-24fb9514cd10',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
|
||||
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 3276.0
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
json_ld.pop('url'), video_id, note='Downloading HD m3u8 information')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
# Angel uses cloudinary in the background and supports image transformations.
|
||||
# We remove these transformations and return the source file
|
||||
base_thumbnail_url = url_or_none(self._og_search_thumbnail(webpage)) or json_ld.pop('thumbnails')
|
||||
if base_thumbnail_url:
|
||||
info_dict['thumbnail'] = re.sub(r'(/upload)/.+(/angel-app/.+)$', r'\1\2', base_thumbnail_url)
|
||||
|
||||
return merge_dicts(info_dict, json_ld)
|
@@ -1,285 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class AnimeLabBaseIE(InfoExtractor):
|
||||
_LOGIN_REQUIRED = True
|
||||
_LOGIN_URL = 'https://www.animelab.com/login'
|
||||
_NETRC_MACHINE = 'animelab'
|
||||
|
||||
def _login(self):
|
||||
def is_logged_in(login_webpage):
|
||||
return 'Sign In' not in login_webpage
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
# Check if already logged in
|
||||
if is_logged_in(login_page):
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None and self._LOGIN_REQUIRED:
|
||||
self.raise_login_required('Login is required to access any AnimeLab content')
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
|
||||
else:
|
||||
raise
|
||||
|
||||
# if login was successful
|
||||
if is_logged_in(response):
|
||||
return
|
||||
|
||||
raise ExtractorError('Unable to login (cannot verify if logged in)')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
class AnimeLabIE(AnimeLabBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
|
||||
|
||||
# the following tests require authentication, but a free account will suffice
|
||||
# just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
|
||||
# or you can set 'username' and 'password' there
|
||||
# the tests also select a specific format so that the same video is downloaded
|
||||
# regardless of whether the user is premium or not (needs testing on a premium account)
|
||||
_TEST = {
|
||||
'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
|
||||
'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
|
||||
'info_dict': {
|
||||
'id': '383',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
|
||||
'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
|
||||
'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
|
||||
'series': 'Fullmetal Alchemist: Brotherhood',
|
||||
'episode': 'Signs of a Counteroffensive',
|
||||
'episode_number': 42,
|
||||
'duration': 1469,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'season_id': '38',
|
||||
},
|
||||
'params': {
|
||||
'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
|
||||
},
|
||||
'skip': 'All AnimeLab content requires authentication',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
# unfortunately we can get different URLs for the same formats
|
||||
# e.g. if we are using a "free" account so no dubs available
|
||||
# (so _remove_duplicate_formats is not effective)
|
||||
# so we use a dictionary as a workaround
|
||||
formats = {}
|
||||
for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
|
||||
'https://www.animelab.com/player/%s/dubbed'):
|
||||
actual_url = language_option_url % display_id
|
||||
webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
|
||||
|
||||
video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
|
||||
position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
|
||||
|
||||
raw_data = video_collection[position]['videoEntry']
|
||||
|
||||
video_id = str_or_none(raw_data['id'])
|
||||
|
||||
# create a title from many sources (while grabbing other info)
|
||||
# TODO use more fallback sources to get some of these
|
||||
series = raw_data.get('showTitle')
|
||||
video_type = raw_data.get('videoEntryType', {}).get('name')
|
||||
episode_number = raw_data.get('episodeNumber')
|
||||
episode_name = raw_data.get('name')
|
||||
|
||||
title_parts = (series, video_type, episode_number, episode_name)
|
||||
if None not in title_parts:
|
||||
title = '%s - %s %s - %s' % title_parts
|
||||
else:
|
||||
title = episode_name
|
||||
|
||||
description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
|
||||
|
||||
duration = int_or_none(raw_data.get('duration'))
|
||||
|
||||
thumbnail_data = raw_data.get('images', [])
|
||||
thumbnails = []
|
||||
for thumbnail in thumbnail_data:
|
||||
for instance in thumbnail['imageInstances']:
|
||||
image_data = instance.get('imageInfo', {})
|
||||
thumbnails.append({
|
||||
'id': str_or_none(image_data.get('id')),
|
||||
'url': image_data.get('fullPath'),
|
||||
'width': image_data.get('width'),
|
||||
'height': image_data.get('height'),
|
||||
})
|
||||
|
||||
season_data = raw_data.get('season', {}) or {}
|
||||
season = str_or_none(season_data.get('name'))
|
||||
season_number = int_or_none(season_data.get('seasonNumber'))
|
||||
season_id = str_or_none(season_data.get('id'))
|
||||
|
||||
for video_data in raw_data['videoList']:
|
||||
current_video_list = {}
|
||||
current_video_list['language'] = video_data.get('language', {}).get('languageCode')
|
||||
|
||||
is_hardsubbed = video_data.get('hardSubbed')
|
||||
|
||||
for video_instance in video_data['videoInstances']:
|
||||
httpurl = video_instance.get('httpUrl')
|
||||
url = httpurl if httpurl else video_instance.get('rtmpUrl')
|
||||
if url is None:
|
||||
# this video format is unavailable to the user (not premium etc.)
|
||||
continue
|
||||
|
||||
current_format = current_video_list.copy()
|
||||
|
||||
format_id_parts = []
|
||||
|
||||
format_id_parts.append(str_or_none(video_instance.get('id')))
|
||||
|
||||
if is_hardsubbed is not None:
|
||||
if is_hardsubbed:
|
||||
format_id_parts.append('yeshardsubbed')
|
||||
else:
|
||||
format_id_parts.append('nothardsubbed')
|
||||
|
||||
format_id_parts.append(current_format['language'])
|
||||
|
||||
format_id = '_'.join([x for x in format_id_parts if x is not None])
|
||||
|
||||
ext = determine_ext(url)
|
||||
if ext == 'm3u8':
|
||||
for format_ in self._extract_m3u8_formats(
|
||||
url, video_id, m3u8_id=format_id, fatal=False):
|
||||
formats[format_['format_id']] = format_
|
||||
continue
|
||||
elif ext == 'mpd':
|
||||
for format_ in self._extract_mpd_formats(
|
||||
url, video_id, mpd_id=format_id, fatal=False):
|
||||
formats[format_['format_id']] = format_
|
||||
continue
|
||||
|
||||
current_format['url'] = url
|
||||
quality_data = video_instance.get('videoQuality')
|
||||
if quality_data:
|
||||
quality = quality_data.get('name') or quality_data.get('description')
|
||||
else:
|
||||
quality = None
|
||||
|
||||
height = None
|
||||
if quality:
|
||||
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
|
||||
|
||||
if height is None:
|
||||
self.report_warning('Could not get height of video')
|
||||
else:
|
||||
current_format['height'] = height
|
||||
current_format['format_id'] = format_id
|
||||
|
||||
formats[current_format['format_id']] = current_format
|
||||
|
||||
formats = list(formats.values())
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode_name,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'season_id': season_id,
|
||||
}
|
||||
|
||||
|
||||
class AnimeLabShowsIE(AnimeLabBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.animelab.com/shows/attack-on-titan',
|
||||
'info_dict': {
|
||||
'id': '45',
|
||||
'title': 'Attack on Titan',
|
||||
'description': 'md5:989d95a2677e9309368d5cf39ba91469',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
'skip': 'All AnimeLab content requires authentication',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
_BASE_URL = 'http://www.animelab.com'
|
||||
_SHOWS_API_URL = '/api/videoentries/show/videos/'
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
|
||||
|
||||
show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
|
||||
show_data = self._parse_json(show_data_str, display_id)
|
||||
|
||||
show_id = str_or_none(show_data.get('id'))
|
||||
title = show_data.get('name')
|
||||
description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
|
||||
|
||||
entries = []
|
||||
for season in show_data['seasons']:
|
||||
season_id = season['id']
|
||||
get_data = urlencode_postdata({
|
||||
'seasonId': season_id,
|
||||
'limit': 1000,
|
||||
})
|
||||
# despite using urlencode_postdata, we are sending a GET request
|
||||
target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
|
||||
response = self._download_webpage(
|
||||
target_url,
|
||||
None, 'Season id %s' % season_id)
|
||||
|
||||
season_data = self._parse_json(response, display_id)
|
||||
|
||||
for video_data in season_data['list']:
|
||||
entries.append(self.url_result(
|
||||
_BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
|
||||
str_or_none(video_data.get('id')), video_data.get('name')
|
||||
))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# TODO implement myqueue
|
@@ -1,291 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
join_nonempty,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AnimeOnDemandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
# German-speaking countries of Europe
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
'title': 'Grimgar, Ashes and Illusions (OmU)',
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Flash videos
|
||||
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
|
||||
self.raise_geo_restricted(
|
||||
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
|
||||
|
||||
login_form = self._form_hidden_inputs('new_user', login_page)
|
||||
|
||||
login_form.update({
|
||||
'user[login]': username,
|
||||
'user[password]': password,
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form), headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
|
||||
response, 'error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
anime_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
if 'data-playlist=' not in webpage:
|
||||
self._download_webpage(
|
||||
self._APPLY_HTML5_URL, anime_id,
|
||||
'Activating HTML5 beta', 'Unable to apply HTML5 beta')
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', fatal=True)
|
||||
|
||||
anime_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
|
||||
webpage, 'anime name')
|
||||
anime_description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
title = attributes.get('data-dialog-header')
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
playlist_urls.append(attributes[playlist_key])
|
||||
if not playlist_urls:
|
||||
continue
|
||||
|
||||
lang = attributes.get('data-lang')
|
||||
lang_note = attributes.get('value')
|
||||
|
||||
for playlist_url in playlist_urls:
|
||||
kind = self._search_regex(
|
||||
r'videomaterialurl/\d+/([^/]+)/',
|
||||
playlist_url, 'media kind', default=None)
|
||||
format_id = join_nonempty(lang, kind) if lang or kind else str(num)
|
||||
format_note = join_nonempty(kind, lang_note, delim=', ')
|
||||
item_id_list = []
|
||||
if format_id:
|
||||
item_id_list.append(format_id)
|
||||
item_id_list.append('videomaterial')
|
||||
playlist = self._download_json(
|
||||
urljoin(url, playlist_url), video_id,
|
||||
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
stream_url)
|
||||
if rtmp:
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'page_url': url,
|
||||
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||
'rtmp_real_time': True,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
continue
|
||||
playlist = playlist[start_video]
|
||||
title = playlist.get('title')
|
||||
if not title:
|
||||
continue
|
||||
description = playlist.get('description')
|
||||
for source in playlist.get('sources', []):
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
format_id = join_nonempty(
|
||||
lang, kind,
|
||||
'hls' if ext == 'm3u8' else None,
|
||||
'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
|
||||
if ext == 'm3u8':
|
||||
file_formats = self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
continue
|
||||
file_formats = self._extract_mpd_formats(
|
||||
file_, video_id, mpd_id=format_id, fatal=False)
|
||||
else:
|
||||
continue
|
||||
for f in file_formats:
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': format_note,
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
yield f
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
yield f
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
for e in extract_entries(episode_html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
for e in extract_entries(html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def entries():
|
||||
has_episodes = False
|
||||
for e in extract_episodes(webpage):
|
||||
has_episodes = True
|
||||
yield e
|
||||
|
||||
if not has_episodes:
|
||||
for e in extract_film(webpage, anime_id):
|
||||
yield e
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), anime_id, anime_title, anime_description)
|
128
plugins/youtube_download/yt_dlp/extractor/ant1newsgr.py
Normal file
128
plugins/youtube_download/yt_dlp/extractor/ant1newsgr.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
'info_dict': {
|
||||
'id': '620286',
|
||||
'title': 'md5:91fe569e952e4d146485740ae927662b',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
||||
'md5': 'dfc58c3a11a5a9aad2ba316ed447def3',
|
||||
'info_dict': {
|
||||
'id': '3f_li_c_az_jw_y_u=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a30c93332455f53e1e84ae0724f0adf7',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
return self._download_and_extract_api_data(video_id, netloc, cid=cid)
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
@@ -10,38 +7,68 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
# This import causes a ModuleNotFoundError on some systems for unknown reason.
|
||||
# See issues:
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/35
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/27449
|
||||
# https://github.com/animelover1984/youtube-dl/issues/17
|
||||
try:
|
||||
from .anvato_token_generator import NFLTokenGenerator
|
||||
except ImportError:
|
||||
NFLTokenGenerator = None
|
||||
|
||||
|
||||
def md5_text(s):
|
||||
if not isinstance(s, compat_str):
|
||||
s = compat_str(s)
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
return hashlib.md5(str(s).encode()).hexdigest()
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
|
||||
|
||||
_API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2'
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
'info_dict': {
|
||||
'id': '8032455',
|
||||
'ext': 'mp4',
|
||||
'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream',
|
||||
'description': 'md5:0a12bab8159445e78f52a297a35c6609',
|
||||
'upload_date': '20220928',
|
||||
'timestamp': 1664408881,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'LIN',
|
||||
'tags': ['video', 'news', '5live'],
|
||||
'duration': 155,
|
||||
'categories': ['News'],
|
||||
},
|
||||
}]
|
||||
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
@@ -214,86 +241,74 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
if self.__server_time is not None:
|
||||
return self.__server_time
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
note='Fetching server time', fatal=False), 'server_time')) or int(time.time())
|
||||
|
||||
self.__server_time = int(self._download_json(
|
||||
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||
note='Fetching server time')['server_time'])
|
||||
|
||||
return self.__server_time
|
||||
|
||||
def _api_prefix(self, access_key):
|
||||
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||
|
||||
def _get_video_json(self, access_key, video_id):
|
||||
def _get_video_json(self, access_key, video_id, extracted_token):
|
||||
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||
video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}'
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
}
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
api = {
|
||||
'anvrid': anvrid,
|
||||
'anvts': server_time,
|
||||
}
|
||||
if self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
api['anvstk2'] = 'default'
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps({'api': api}).encode('utf-8'))
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
video_url = published_url.get('embed_url')
|
||||
if not video_url:
|
||||
continue
|
||||
media_format = published_url.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
@@ -308,15 +323,27 @@ class AnvatoIE(InfoExtractor):
|
||||
'tbr': tbr or None,
|
||||
}
|
||||
|
||||
if media_format == 'm3u8' and tbr is not None:
|
||||
vtt_subs, hls_subs = {}, {}
|
||||
if media_format == 'vtt':
|
||||
_, vtt_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, m3u8_id='vtt', fatal=False)
|
||||
continue
|
||||
elif media_format == 'm3u8' and tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': join_nonempty('hls', tbr),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
# For some videos the initial m3u8 URL returns JSON instead
|
||||
manifest_json = self._download_json(
|
||||
video_url, video_id, note='Downloading manifest JSON', errnote=False)
|
||||
if manifest_json:
|
||||
video_url = manifest_json.get('master_m3u8')
|
||||
if not video_url:
|
||||
continue
|
||||
hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(hls_fmts)
|
||||
continue
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
@@ -327,8 +354,6 @@ class AnvatoIE(InfoExtractor):
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
@@ -336,6 +361,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -352,30 +378,19 @@ class AnvatoIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(ie, webpage, video_id):
|
||||
entries = []
|
||||
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
||||
anvplayer_data = ie._parse_json(
|
||||
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not anvplayer_data:
|
||||
continue
|
||||
video = anvplayer_data.get('video')
|
||||
if not isinstance(video, compat_str) or not video.isdigit():
|
||||
continue
|
||||
access_key = anvplayer_data.get('accessKey')
|
||||
if not access_key:
|
||||
mcp = anvplayer_data.get('mcp')
|
||||
if mcp:
|
||||
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
mcp.lower())
|
||||
@classmethod
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
for mobj in re.finditer(cls._ANVP_RE, webpage):
|
||||
anvplayer_data = unescapeHTML(json.loads(mobj.group('anvp'))) or {}
|
||||
video_id, access_key = anvplayer_data.get('video'), anvplayer_data.get('accessKey')
|
||||
if not access_key:
|
||||
access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
|
||||
if not (video_id or '').isdigit() or not access_key:
|
||||
continue
|
||||
entries.append(ie.url_result(
|
||||
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
|
||||
video_id=video))
|
||||
return entries
|
||||
url = f'anvato:{access_key}:{video_id}'
|
||||
if anvplayer_data.get('token'):
|
||||
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
||||
yield cls.url_result(url, AnvatoIE, video_id)
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(
|
||||
@@ -383,7 +398,7 @@ class AnvatoIE(InfoExtractor):
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@@ -391,9 +406,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token'))
|
||||
|
@@ -1,7 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nfl import NFLTokenGenerator
|
||||
|
||||
__all__ = [
|
||||
'NFLTokenGenerator',
|
||||
]
|
@@ -1,6 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class TokenGenerator:
|
||||
def generate(self, anvack, mcp_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
@@ -1,30 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import TokenGenerator
|
||||
|
||||
|
||||
class NFLTokenGenerator(TokenGenerator):
|
||||
_AUTHORIZATION = None
|
||||
|
||||
def generate(ie, anvack, mcp_id):
|
||||
if not NFLTokenGenerator._AUTHORIZATION:
|
||||
reroute = ie._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id,
|
||||
data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100})
|
||||
NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
|
||||
return ie._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': NFLTokenGenerator._AUTHORIZATION,
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['mediaToken']['token']
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .yahoo import YahooIE
|
||||
@@ -12,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AolIE(YahooIE):
|
||||
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
@@ -122,7 +119,6 @@ class AolIE(YahooIE):
|
||||
'height': int_or_none(qs.get('h', [None])[0]),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,8 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -13,6 +8,7 @@ from ..utils import (
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
@@ -33,14 +29,6 @@ class APAIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
@@ -84,7 +72,6 @@ class APAIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
@@ -13,6 +10,7 @@ from ..utils import (
|
||||
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_EMBED_REGEX = [r'<iframe .*?src="(?P<url>http://www\.aparat\.com/video/[^"]+)"']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
@@ -75,7 +73,6 @@ class AparatIE(InfoExtractor):
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
|
@@ -1,9 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
@@ -14,16 +13,17 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
@@ -39,24 +39,47 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@@ -122,7 +120,6 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
@@ -187,8 +184,6 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'height': int_or_none(format['height']),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
|
@@ -1,39 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE, YoutubeBaseInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_HTTPError
|
||||
)
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
KNOWN_EXTENSIONS,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
str_to_int,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
url_or_none
|
||||
)
|
||||
|
||||
|
||||
@@ -54,6 +52,11 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'upload_date': '20100315',
|
||||
'creator': 'SRI International',
|
||||
'uploader': 'laura@archive.org',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'release_year': 1968,
|
||||
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
|
||||
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
@@ -62,33 +65,43 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'description': 'md5:cd6f9910c35aedd5fc237dbc3957e2ca',
|
||||
'uploader': 'yorkmba99@hotmail.com',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': '20131222',
|
||||
'display_id': 'Cops-v2.mp4',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'duration': 1091.96,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Election_Ads',
|
||||
'md5': '284180e857160cf866358700bab668a3',
|
||||
'md5': 'eec5cddebd4793c6a653b69c3b11f2e6',
|
||||
'info_dict': {
|
||||
'id': 'Election_Ads/Commercial-JFK1960ElectionAdCampaignJingle.mpg',
|
||||
'title': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mpg',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'duration': 59.77,
|
||||
'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
|
||||
'md5': '7915213ef02559b5501fe630e1a53f59',
|
||||
'md5': 'ea1eed8234e7d4165f38c8c769edef38',
|
||||
'info_dict': {
|
||||
'id': 'Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
|
||||
'title': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mpg',
|
||||
'timestamp': 1205588045,
|
||||
'uploader': 'mikedavisstripmaster@yahoo.com',
|
||||
'description': '1960 Presidential Campaign Election Commercials John F Kennedy, Richard M Nixon',
|
||||
'upload_date': '20080315',
|
||||
'display_id': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
|
||||
'duration': 59.51,
|
||||
'license': 'http://creativecommons.org/licenses/publicdomain/',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
|
||||
@@ -97,6 +110,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t01.flac',
|
||||
'title': 'Turning',
|
||||
'ext': 'flac',
|
||||
'track': 'Turning',
|
||||
'creator': 'Grateful Dead',
|
||||
'display_id': 'gd1977-05-08d01t01.flac',
|
||||
'track_number': 1,
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'duration': 39.8,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
|
||||
@@ -107,11 +126,20 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'ext': 'flac',
|
||||
'timestamp': 1205895624,
|
||||
'uploader': 'mvernon54@yahoo.com',
|
||||
'description': 'md5:6a31f1996db0aa0fc9da6d6e708a1bb0',
|
||||
'description': 'md5:6c921464414814720c6593810a5c7e3d',
|
||||
'upload_date': '20080319',
|
||||
'location': 'Barton Hall - Cornell University',
|
||||
'duration': 438.68,
|
||||
'track': 'Deal',
|
||||
'creator': 'Grateful Dead',
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'release_date': '19770508',
|
||||
'display_id': 'gd1977-05-08d01t07.flac',
|
||||
'release_year': 1977,
|
||||
'track_number': 7,
|
||||
},
|
||||
}, {
|
||||
# FIXME: give a better error message than just IndexError when all available formats are restricted
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik',
|
||||
'md5': '7cb019baa9b332e82ea7c10403acd180',
|
||||
'info_dict': {
|
||||
@@ -119,6 +147,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Bells Of Rostov',
|
||||
'ext': 'mp3',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
}, {
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
|
||||
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
|
||||
@@ -131,6 +160,52 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:012b2d668ae753be36896f343d12a236',
|
||||
'upload_date': '20190928',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
}, {
|
||||
# Original formats are private
|
||||
'url': 'https://archive.org/details/irelandthemakingofarepublic',
|
||||
'info_dict': {
|
||||
'id': 'irelandthemakingofarepublic',
|
||||
'title': 'Ireland: The Making of a Republic',
|
||||
'upload_date': '20160610',
|
||||
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
|
||||
'uploader': 'dimitrios@archive.org',
|
||||
'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'timestamp': 1465594947,
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '0b211261b26590d49df968f71b90690d',
|
||||
'info_dict': {
|
||||
'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_01.mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'irelandthemakingofarepublicreel1_01.mov',
|
||||
'duration': 130.46,
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel1_01.mov',
|
||||
},
|
||||
}, {
|
||||
'md5': '67335ee3b23a0da930841981c1e79b02',
|
||||
'info_dict': {
|
||||
'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_02.mov',
|
||||
'ext': 'mp4',
|
||||
'duration': 1395.13,
|
||||
'title': 'irelandthemakingofarepublicreel1_02.mov',
|
||||
'display_id': 'irelandthemakingofarepublicreel1_02.mov',
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg',
|
||||
},
|
||||
}, {
|
||||
'md5': 'e470e86787893603f4a341a16c281eb5',
|
||||
'info_dict': {
|
||||
'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel2.mov',
|
||||
'ext': 'mp4',
|
||||
'duration': 1602.67,
|
||||
'title': 'irelandthemakingofarepublicreel2.mov',
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
},
|
||||
}
|
||||
]
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -146,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
return json.loads(extract_attributes(element)['value'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote_plus(self._match_id(url))
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
@@ -221,17 +296,25 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size'))})
|
||||
|
||||
extension = (f['name'].rsplit('.', 1) + [None])[1]
|
||||
if extension in KNOWN_EXTENSIONS:
|
||||
|
||||
# We don't want to skip private formats if the user has access to them,
|
||||
# however without access to an account with such privileges we can't implement/test this.
|
||||
# For now to be safe, we will only skip them if there is no user logged in.
|
||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https'})
|
||||
'protocol': 'https',
|
||||
'source_preference': 0 if f.get('source') == 'original' else -1,
|
||||
'format_note': f.get('source')
|
||||
})
|
||||
|
||||
for entry in entries.values():
|
||||
self._sort_formats(entry['formats'])
|
||||
entry['_format_sort_fields'] = ('source', )
|
||||
|
||||
if len(entries) == 1:
|
||||
# If there's only one item, use it as the main info dict
|
||||
@@ -287,7 +370,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
'duration': 32,
|
||||
'uploader_id': 'Zeurel',
|
||||
'uploader_url': 'http://www.youtube.com/user/Zeurel'
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
}
|
||||
}, {
|
||||
# Internal link
|
||||
@@ -302,7 +387,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
'duration': 771,
|
||||
'uploader_id': '1veritasium',
|
||||
'uploader_url': 'http://www.youtube.com/user/1veritasium'
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
}
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
@@ -316,7 +403,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 398,
|
||||
'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3',
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'http://www.youtube.com/user/machinima'
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
@@ -330,7 +419,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 19,
|
||||
'description': 'md5:10436b12e07ac43ff8df65287a56efb4',
|
||||
'uploader_id': 'jawed',
|
||||
'uploader_url': 'http://www.youtube.com/user/jawed'
|
||||
'uploader_url': 'https://www.youtube.com/user/jawed',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
@@ -344,7 +436,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 204,
|
||||
'description': 'md5:f7535343b6eda34a314eff8b85444680',
|
||||
'uploader_id': 'itsmadeon',
|
||||
'uploader_url': 'http://www.youtube.com/user/itsmadeon'
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
@@ -355,10 +449,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News',
|
||||
'upload_date': '20160218',
|
||||
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'duration': 1236,
|
||||
'duration': 1235,
|
||||
'description': 'md5:21032bae736421e89c2edf36d1936947',
|
||||
'uploader_id': 'MachinimaETC',
|
||||
'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
|
||||
'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
}
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
@@ -369,10 +466,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.',
|
||||
'upload_date': '20160219',
|
||||
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'duration': 798,
|
||||
'duration': 797,
|
||||
'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7',
|
||||
'uploader_id': 'MachinimaETC',
|
||||
'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
|
||||
'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
@@ -392,12 +492,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'It\'s Bootleg AirPods Time.',
|
||||
'upload_date': '20211021',
|
||||
'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'channel_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'duration': 810,
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
'uploader_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug'
|
||||
}
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
@@ -408,12 +507,135 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'bitch lasagna',
|
||||
'upload_date': '20181005',
|
||||
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'channel_url': 'http://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'duration': 135,
|
||||
'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0',
|
||||
'uploader': 'PewDiePie',
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'http://www.youtube.com/user/PewDiePie'
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
'info_dict': {
|
||||
'id': '8XeW5ilk-9Y',
|
||||
'ext': 'flv',
|
||||
'title': 'Story of Stuff, The Critique Part 4 of 4',
|
||||
'duration': 541,
|
||||
'description': 'md5:28157da06f2c5e94c97f7f3072509972',
|
||||
'uploader': 'HowTheWorldWorks',
|
||||
'uploader_id': 'HowTheWorldWorks',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
}
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
'info_dict': {
|
||||
'id': 'Q_yjX80U7Yc',
|
||||
'ext': 'flv',
|
||||
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
|
||||
'uploader_id': 'claybutlermusic',
|
||||
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
|
||||
'upload_date': '20090803',
|
||||
'uploader': 'claybutlermusic',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
}
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
'info_dict': {
|
||||
'id': 'c5uJgG05xUY',
|
||||
'ext': 'webm',
|
||||
'title': 'Story of Stuff, The Critique Part 1 of 4',
|
||||
'uploader_id': 'HowTheWorldWorks',
|
||||
'uploader': 'HowTheWorldWorks',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090513',
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
}
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
'info_dict': {
|
||||
'id': 'xWTLLl-dQaA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Nerd eHarmony Video Bio Parody (SPOOF)',
|
||||
'uploader_url': 'https://www.youtube.com/user/BlackNerdComedy',
|
||||
'description': 'md5:e25f0133aaf9e6793fb81c18021d193e',
|
||||
'uploader_id': 'BlackNerdComedy',
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
'info_dict': {
|
||||
'id': '9eO1aasHyTM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Polar-oid',
|
||||
'description': 'Cameras and bears are dangerous!',
|
||||
'uploader_url': 'https://www.youtube.com/user/punkybird',
|
||||
'uploader_id': 'punkybird',
|
||||
'duration': 202,
|
||||
'channel_id': 'UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
}
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
'info_dict': {
|
||||
'id': 'Cf7vS8jc7dY',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Dramatic Pool Story (by Jamie Spicer-Lewis) - Game Grumps Animated',
|
||||
'duration': 64,
|
||||
'upload_date': '20200408',
|
||||
'uploader_id': 'GameGrumps',
|
||||
'uploader': 'GameGrumps',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC9CuvdOVfMPvKCiwdGKL3cQ',
|
||||
'channel_id': 'UC9CuvdOVfMPvKCiwdGKL3cQ',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
}
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
'info_dict': {
|
||||
'id': 'kbh4T_b4Ixw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shovel Knight OST - Strike the Earth! Plains of Passage 16 bit SNES style remake / remix',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCnTaGvsHmMy792DWeT6HbGA',
|
||||
'uploader': 'Nelward music',
|
||||
'duration': 213,
|
||||
'description': 'md5:804b4a9ce37b050a5fefdbb23aeba54d',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
}
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
'info_dict': {
|
||||
'id': 'SOm7mPoPskU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boyfriend - Justin Bieber Parody',
|
||||
'uploader_url': 'https://www.youtube.com/user/thecomputernerd01',
|
||||
'uploader': 'thecomputernerd01',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:dd7fa635519c2a5b4d566beaecad7491',
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
@@ -445,9 +667,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'only_matching': True
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x:
|
||||
(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
|
||||
{YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}
|
||||
)'''
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
@@ -457,7 +681,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note='Downloading CDX API JSON'):
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@@ -468,21 +692,17 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'collapse': collapse or [],
|
||||
**(query or {})
|
||||
}
|
||||
res = self._download_json('https://web.archive.org/cdx/search/cdx', item_id, note, query=query)
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
elif not isinstance(res, list) or len(res) != 0:
|
||||
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
|
||||
|
||||
def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
|
||||
return self._parse_json(self._search_regex(
|
||||
(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
|
||||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
||||
|
||||
def _extract_webpage_title(self, webpage):
|
||||
page_title = self._html_search_regex(
|
||||
r'<title>([^<]*)</title>', webpage, 'title', default='')
|
||||
page_title = self._html_extract_title(webpage, default='')
|
||||
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
|
||||
return self._html_search_regex(
|
||||
r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
|
||||
@@ -490,10 +710,32 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
def _extract_metadata(self, video_id, webpage):
|
||||
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
|
||||
initial_data = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {}
|
||||
player_response = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
|
||||
video_id, default={})
|
||||
initial_data = self._search_json(
|
||||
self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
|
||||
|
||||
ytcfg = {}
|
||||
for j in re.findall(r'yt\.setConfig\(\s*(?P<json>{\s*(?s:.+?)\s*})\s*\);', webpage): # ~June 2010
|
||||
ytcfg.update(self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or {})
|
||||
|
||||
# XXX: this also may contain a 'ptchn' key
|
||||
player_config = (
|
||||
self._search_json(
|
||||
r'(?:yt\.playerConfig|ytplayer\.config|swfConfig)\s*=',
|
||||
webpage, 'player config', video_id, default=None)
|
||||
or ytcfg.get('PLAYER_CONFIG') or {})
|
||||
|
||||
# XXX: this may also contain a 'creator' key.
|
||||
swf_args = self._search_json(r'swfArgs\s*=', webpage, 'swf config', video_id, default={})
|
||||
if swf_args and not traverse_obj(player_config, ('args',)):
|
||||
player_config['args'] = swf_args
|
||||
|
||||
if not player_response:
|
||||
# April 2020
|
||||
player_response = self._parse_json(
|
||||
traverse_obj(player_config, ('args', 'player_response')) or '{}', video_id, fatal=False)
|
||||
|
||||
initial_data_video = traverse_obj(
|
||||
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
|
||||
@@ -509,21 +751,64 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
video_details.get('title')
|
||||
or YoutubeBaseInfoExtractor._get_text(microformats, 'title')
|
||||
or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title')
|
||||
or traverse_obj(player_config, ('args', 'title'))
|
||||
or self._extract_webpage_title(webpage)
|
||||
or search_meta(['og:title', 'twitter:title', 'title']))
|
||||
|
||||
def id_from_url(url, type_):
|
||||
return self._search_regex(
|
||||
rf'(?:{type_})/([^/#&?]+)', url or '', f'{type_} id', default=None)
|
||||
|
||||
# XXX: would the get_elements_by_... functions be better suited here?
|
||||
_CHANNEL_URL_HREF_RE = r'href="[^"]*(?P<url>https?://www\.youtube\.com/(?:user|channel)/[^"]+)"'
|
||||
uploader_or_channel_url = self._search_regex(
|
||||
[fr'<(?:link\s*itemprop=\"url\"|a\s*id=\"watch-username\").*?\b{_CHANNEL_URL_HREF_RE}>', # @fd05024
|
||||
fr'<div\s*id=\"(?:watch-channel-stats|watch-headline-user-info)\"[^>]*>\s*<a[^>]*\b{_CHANNEL_URL_HREF_RE}'], # ~ May 2009, ~June 2012
|
||||
webpage, 'uploader or channel url', default=None)
|
||||
|
||||
owner_profile_url = url_or_none(microformats.get('ownerProfileUrl')) # @a6211d2
|
||||
|
||||
# Uploader refers to the /user/ id ONLY
|
||||
uploader_id = (
|
||||
id_from_url(owner_profile_url, 'user')
|
||||
or id_from_url(uploader_or_channel_url, 'user')
|
||||
or ytcfg.get('VIDEO_USERNAME'))
|
||||
uploader_url = f'https://www.youtube.com/user/{uploader_id}' if uploader_id else None
|
||||
|
||||
# XXX: do we want to differentiate uploader and channel?
|
||||
uploader = (
|
||||
self._search_regex(
|
||||
[r'<a\s*id="watch-username"[^>]*>\s*<strong>([^<]+)</strong>', # June 2010
|
||||
r'var\s*watchUsername\s*=\s*\'(.+?)\';', # ~May 2009
|
||||
r'<div\s*\bid=\"watch-channel-stats"[^>]*>\s*<a[^>]*>\s*(.+?)\s*</a', # ~May 2009
|
||||
r'<a\s*id="watch-userbanner"[^>]*title="\s*(.+?)\s*"'], # ~June 2012
|
||||
webpage, 'uploader', default=None)
|
||||
or self._html_search_regex(
|
||||
[r'(?s)<div\s*class="yt-user-info".*?<a[^>]*[^>]*>\s*(.*?)\s*</a', # March 2016
|
||||
r'(?s)<a[^>]*yt-user-name[^>]*>\s*(.*?)\s*</a'], # july 2013
|
||||
get_element_by_id('watch7-user-header', webpage), 'uploader', default=None)
|
||||
or self._html_search_regex(
|
||||
r'<button\s*href="/user/[^>]*>\s*<span[^>]*>\s*(.+?)\s*<', # April 2012
|
||||
get_element_by_id('watch-headline-user-info', webpage), 'uploader', default=None)
|
||||
or traverse_obj(player_config, ('args', 'creator'))
|
||||
or video_details.get('author'))
|
||||
|
||||
channel_id = str_or_none(
|
||||
video_details.get('channelId')
|
||||
or microformats.get('externalChannelId')
|
||||
or search_meta('channelId')
|
||||
or self._search_regex(
|
||||
r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', # @b45a9e6
|
||||
webpage, 'channel id', default=None, group='id'))
|
||||
channel_url = f'http://www.youtube.com/channel/{channel_id}' if channel_id else None
|
||||
webpage, 'channel id', default=None, group='id')
|
||||
or id_from_url(owner_profile_url, 'channel')
|
||||
or id_from_url(uploader_or_channel_url, 'channel')
|
||||
or traverse_obj(player_config, ('args', 'ucid')))
|
||||
|
||||
channel_url = f'https://www.youtube.com/channel/{channel_id}' if channel_id else None
|
||||
duration = int_or_none(
|
||||
video_details.get('lengthSeconds')
|
||||
or microformats.get('lengthSeconds')
|
||||
or traverse_obj(player_config, ('args', ('length_seconds', 'l')), get_all=False)
|
||||
or parse_duration(search_meta('duration')))
|
||||
description = (
|
||||
video_details.get('shortDescription')
|
||||
@@ -531,26 +816,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
or clean_html(get_element_by_id('eow-description', webpage)) # @9e6dd23
|
||||
or search_meta(['description', 'og:description', 'twitter:description']))
|
||||
|
||||
uploader = video_details.get('author')
|
||||
|
||||
# Uploader ID and URL
|
||||
uploader_mobj = re.search(
|
||||
r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">', # @fd05024
|
||||
webpage)
|
||||
if uploader_mobj is not None:
|
||||
uploader_id, uploader_url = uploader_mobj.group('uploader_id'), uploader_mobj.group('uploader_url')
|
||||
else:
|
||||
# @a6211d2
|
||||
uploader_url = url_or_none(microformats.get('ownerProfileUrl'))
|
||||
uploader_id = self._search_regex(
|
||||
r'(?:user|channel)/([^/]+)', uploader_url or '', 'uploader id', default=None)
|
||||
|
||||
upload_date = unified_strdate(
|
||||
dict_get(microformats, ('uploadDate', 'publishDate'))
|
||||
or search_meta(['uploadDate', 'datePublished'])
|
||||
or self._search_regex(
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], # @7998520
|
||||
[r'(?s)id="eow-date.*?>\s*(.*?)\s*</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']', # @7998520
|
||||
r'class\s*=\s*"(?:watch-video-date|watch-video-added post-date)"[^>]*>\s*([^<]+?)\s*<'], # ~June 2010, ~Jan 2009 (respectively)
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
@@ -596,7 +868,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
response = self._call_cdx_api(
|
||||
video_id, f'https://www.youtube.com/watch?v={video_id}',
|
||||
filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or []
|
||||
all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None])
|
||||
all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None)
|
||||
|
||||
# Prefer the new polymer UI captures as we support extracting more metadata from them
|
||||
# WBM captures seem to all switch to this layout ~July 2020
|
||||
@@ -619,18 +891,22 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
urlh = None
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org',
|
||||
expected=True)
|
||||
else:
|
||||
raise
|
||||
retry_manager = self.RetryManager(fatal=False)
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
retry.error = e
|
||||
|
||||
if retry_manager.error:
|
||||
self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
@@ -671,3 +947,237 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -73,8 +70,8 @@ class ArcPublishingIE(InfoExtractor):
|
||||
], 'video-api-cdn.%s.arcpublishing.com/api'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
@@ -124,8 +121,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
formats.extend(smil_formats)
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
@@ -148,7 +144,6 @@ class ArcPublishingIE(InfoExtractor):
|
||||
'url': s_url,
|
||||
'quality': -10,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -43,14 +40,15 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
'This video is not available due to geoblocking',
|
||||
countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
}, {
|
||||
'ext': 'vtt',
|
||||
'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt',
|
||||
}]
|
||||
|
||||
return {
|
||||
@@ -265,7 +263,6 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -292,16 +289,16 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
# available till 7.12.2023
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
||||
'info_dict': {
|
||||
'id': 'maischberger-die-woche-video100',
|
||||
'display_id': 'maischberger-die-woche-video100',
|
||||
'id': 'maischberger-video-424',
|
||||
'display_id': 'maischberger-video-424',
|
||||
'ext': 'mp4',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
'upload_date': '20210107',
|
||||
'duration': 4452.0,
|
||||
'title': 'maischberger am 07.12.2022',
|
||||
'upload_date': '20221207',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
@@ -374,7 +371,6 @@ class ARDIE(InfoExtractor):
|
||||
continue
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
_SUB_FORMATS = (
|
||||
('./dataTimedText', 'ttml'),
|
||||
@@ -407,8 +403,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:(?P<client>[^/]+)/)?
|
||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
||||
(?:(?P<display_id>[^?#]+)/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)'''
|
||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@@ -436,6 +433,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
|
||||
'playlist_count': 6,
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
|
||||
'title': 'beforeigners/beforeigners/staffel-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -561,14 +565,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
break
|
||||
pageNumber = pageNumber + 1
|
||||
|
||||
return self.playlist_result(entries, playlist_title=display_id)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, playlist_type, client = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client')
|
||||
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client', 'season')
|
||||
display_id, client = display_id or video_id, client or 'ard'
|
||||
|
||||
if playlist_type:
|
||||
# TODO: Extract only specified season
|
||||
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
|
||||
|
||||
player_page = self._download_json(
|
||||
|
@@ -1,8 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -22,6 +17,8 @@ class ArkenaIE(InfoExtractor):
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
|
||||
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
|
||||
@@ -53,15 +50,6 @@ class ArkenaIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
@@ -148,7 +136,6 @@ class ArkenaIE(InfoExtractor):
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
@@ -76,7 +73,6 @@ class ArnesIE(InfoExtractor):
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = video.get('channel') or {}
|
||||
channel_id = channel.get('url')
|
||||
@@ -93,7 +89,7 @@ class ArnesIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
|
||||
'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
|
@@ -1,188 +1,232 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||
_API_BASE = 'https://api.arte.tv/api/player/v2'
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
)
|
||||
/(?P<id>\d{6}-\d{3}-[AF])
|
||||
|arte://program)
|
||||
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '100103-000-A',
|
||||
'title': 'USA: Dyskryminacja na porodówce',
|
||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
||||
'alt_title': 'ARTE Reportage',
|
||||
'upload_date': '20201103',
|
||||
'duration': 554,
|
||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
||||
'timestamp': 1604417980,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
'info_dict': {
|
||||
'id': '110371-000-A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220718',
|
||||
'duration': 154,
|
||||
'timestamp': 1658162460,
|
||||
'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
|
||||
'title': 'La chaleur, supplice des arbres de rue',
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'info_dict': {
|
||||
'id': '110203-006-A',
|
||||
'chapters': 'count:16',
|
||||
'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
|
||||
'alt_title': 'Zaz',
|
||||
'title': 'Baloise Session 2022',
|
||||
'timestamp': 1668445200,
|
||||
'duration': 4054,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
|
||||
'upload_date': '20221114',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'expected_warnings': ['geo restricted']
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
|
||||
_LANG_MAP = { # ISO639 -> French abbreviations
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
'it': 'E[ITA]',
|
||||
'pl': 'E[POL]',
|
||||
# XXX: probably means mixed; <https://www.arte.tv/en/videos/107710-029-A/dispatches-from-ukraine-local-journalists-report/>
|
||||
# uses this code for audio that happens to be in Ukrainian, but the manifest uses the ISO code 'mul' (mixed)
|
||||
'mul': 'EU',
|
||||
}
|
||||
|
||||
_VERSION_CODE_RE = re.compile(r'''(?x)
|
||||
V
|
||||
(?P<original_voice>O?)
|
||||
(?P<vlang>[FA]|E\[[A-Z]+\]|EU)?
|
||||
(?P<audio_desc>AUD|)
|
||||
(?:
|
||||
(?P<has_sub>-ST)
|
||||
(?P<sdh_sub>M?)
|
||||
(?P<sub_lang>[FA]|E\[[A-Z]+\]|EU)
|
||||
)?
|
||||
''')
|
||||
|
||||
# all obtained by exhaustive testing
|
||||
_COUNTRIES_MAP = {
|
||||
'DE_FR': (
|
||||
'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
|
||||
'PF', 'PM', 'RE', 'WF', 'YT',
|
||||
),
|
||||
# with both of the below 'BE' sometimes works, sometimes doesn't
|
||||
'EUR_DE_FR': (
|
||||
'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
|
||||
'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
|
||||
'YT',
|
||||
),
|
||||
'SAT': (
|
||||
'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
|
||||
'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
|
||||
'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
|
||||
'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
|
||||
'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
|
||||
'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
|
||||
),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
|
||||
info = self._download_json(
|
||||
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
|
||||
|
||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||
if not vsr:
|
||||
error = None
|
||||
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
|
||||
error = try_get(
|
||||
player_info, lambda x: x['custom_msg']['msg'], compat_str)
|
||||
if not error:
|
||||
error = 'Video %s is not available' % player_info.get('VID') or video_id
|
||||
raise ExtractorError(error, expected=True)
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
if geoblocking.get('restrictedArea'):
|
||||
raise GeoRestrictedError(f'Video restricted to {geoblocking["code"]!r}',
|
||||
countries=self._COUNTRIES_MAP.get(geoblocking['code'], ('DE', 'FR')))
|
||||
|
||||
upload_date_str = player_info.get('shootingDate')
|
||||
if not upload_date_str:
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
if not traverse_obj(config, ('data', 'attributes', 'rights')):
|
||||
# Eg: https://www.arte.tv/de/videos/097407-215-A/28-minuten
|
||||
# Eg: https://www.arte.tv/es/videos/104351-002-A/serviteur-du-peuple-1-23
|
||||
raise ExtractorError(
|
||||
'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
|
||||
|
||||
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
formats, subtitles = [], {}
|
||||
secondary_formats = []
|
||||
for stream in config['data']['attributes']['streams']:
|
||||
# official player contains code like `e.get("versions")[0].eStat.ml5`
|
||||
stream_version = stream['versions'][0]
|
||||
stream_version_code = stream_version['eStat']['ml5']
|
||||
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
lang_pref = -1
|
||||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||
if m:
|
||||
lang_pref = int(''.join('01'[x] for x in (
|
||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
||||
not m.group('audio_desc'), # and not the audio description version
|
||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
|
||||
LANGS = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
'it': 'E[ITA]',
|
||||
'pl': 'E[POL]',
|
||||
}
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update({
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
|
||||
'language_preference': lang_pref,
|
||||
})
|
||||
if any(map(short_label.startswith, ('cc', 'OGsub'))):
|
||||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
langcode = LANGS.get(lang, lang)
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
formats.append({
|
||||
'format_id': f'{stream["protocol"]}-{stream_version_code}',
|
||||
'url': stream['url'],
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
|
||||
'language_preference': lang_pref,
|
||||
# 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS
|
||||
})
|
||||
|
||||
formats = []
|
||||
for format_id, format_dict in vsr.items():
|
||||
f = dict(format_dict)
|
||||
format_url = url_or_none(f.get('url'))
|
||||
streamer = f.get('streamer')
|
||||
if not format_url and not streamer:
|
||||
continue
|
||||
versionCode = f.get('versionCode')
|
||||
l = re.escape(langcode)
|
||||
|
||||
# Language preference from most to least priority
|
||||
# Reference: section 6.8 of
|
||||
# https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
|
||||
PREFERENCES = (
|
||||
# original version in requested language, without subtitles
|
||||
r'VO{0}$'.format(l),
|
||||
# original version in requested language, with partial subtitles in requested language
|
||||
r'VO{0}-ST{0}$'.format(l),
|
||||
# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'VO{0}-STM{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, without subtitles
|
||||
r'V{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
|
||||
r'V{0}-ST{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'V{0}-STM{0}$'.format(l),
|
||||
# original version in requested language, with partial subtitles in different language
|
||||
r'VO{0}-ST(?!{0}).+?$'.format(l),
|
||||
# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
|
||||
r'VO{0}-STM(?!{0}).+?$'.format(l),
|
||||
# original version in different language, with partial subtitles in requested language
|
||||
r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
|
||||
# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
|
||||
# original version in different language, without subtitles
|
||||
r'VO(?:(?!{0}))?$'.format(l),
|
||||
# original version in different language, with partial subtitles in different language
|
||||
r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
|
||||
# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
|
||||
r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
|
||||
)
|
||||
|
||||
for pref, p in enumerate(PREFERENCES):
|
||||
if re.match(p, versionCode):
|
||||
lang_pref = len(PREFERENCES) - pref
|
||||
break
|
||||
else:
|
||||
lang_pref = -1
|
||||
self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}')
|
||||
|
||||
media_type = f.get('mediaType')
|
||||
if media_type == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['language_preference'] = lang_pref
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
formats.extend(secondary_formats)
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'language_preference': lang_pref,
|
||||
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
'quality': qfunc(f.get('quality')),
|
||||
}
|
||||
|
||||
if media_type == 'rtmp':
|
||||
format['url'] = f['streamer']
|
||||
format['play_path'] = 'mp4:' + f['url']
|
||||
format['ext'] = 'flv'
|
||||
else:
|
||||
format['url'] = f['url']
|
||||
|
||||
formats.append(format)
|
||||
|
||||
# For this extractor, quality only represents the relative quality
|
||||
# with respect to other formats with the same resolution
|
||||
self._sort_formats(formats, ('res', 'quality'))
|
||||
metadata = config['data']['attributes']['metadata']
|
||||
|
||||
return {
|
||||
'id': player_info.get('VID') or video_id,
|
||||
'title': title,
|
||||
'description': player_info.get('VDE') or player_info.get('V7T'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
'id': metadata['providerId'],
|
||||
'webpage_url': traverse_obj(metadata, ('link', 'url')),
|
||||
'title': traverse_obj(metadata, 'subtitle', 'title'),
|
||||
'alt_title': metadata.get('subtitle') and metadata.get('title'),
|
||||
'description': metadata.get('description'),
|
||||
'duration': traverse_obj(metadata, ('duration', 'seconds')),
|
||||
'language': metadata.get('language'),
|
||||
'timestamp': traverse_obj(config, ('data', 'attributes', 'rights', 'begin'), expected_type=parse_iso8601),
|
||||
'is_live': config['data']['attributes'].get('live', False),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [
|
||||
{'url': image['url'], 'id': image.get('caption')}
|
||||
for image in metadata.get('images') or [] if url_or_none(image.get('url'))
|
||||
],
|
||||
# TODO: chapters may also be in stream['segments']?
|
||||
'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., {
|
||||
'start_time': 'startTime',
|
||||
'title': 'title',
|
||||
})) or None,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||
'info_dict': {
|
||||
@@ -192,17 +236,12 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
'skip': 'No video available'
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = parse_qs(url)
|
||||
json_url = qs['json_url'][0]
|
||||
@@ -215,41 +254,71 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
'id': 'RC-016954',
|
||||
'title': 'Earn a Living',
|
||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||
'only_matching': True,
|
||||
'playlist_mincount': 100,
|
||||
'info_dict': {
|
||||
'description': 'md5:84e7bf1feda248bc325ebfac818c476e',
|
||||
'id': 'RC-014123',
|
||||
'title': 'ARTE Reportage - najlepsze reportaże',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = self._match_valid_url(url).group('lang', 'id')
|
||||
playlist = self._download_json(
|
||||
f'{self._API_BASE}/playlist/{lang}/{playlist_id}', playlist_id)['data']['attributes']
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': video['config']['url'],
|
||||
'ie_key': ArteTVIE.ie_key(),
|
||||
'id': video.get('providerId'),
|
||||
'title': video.get('title'),
|
||||
'alt_title': video.get('subtitle'),
|
||||
'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))),
|
||||
'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))),
|
||||
} for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))]
|
||||
|
||||
return self.playlist_result(entries, playlist_id,
|
||||
traverse_obj(playlist, ('metadata', 'title')),
|
||||
traverse_obj(playlist, ('metadata', 'description')))
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
'id': 'politics-and-society',
|
||||
'title': 'Politics and society',
|
||||
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
and super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = self._match_valid_url(url).groups()
|
||||
collection = self._download_json(
|
||||
'%s/collectionData/%s/%s?source=videos'
|
||||
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||
entries = []
|
||||
for video in collection['videos']:
|
||||
if not isinstance(video, dict):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
video_id = video.get('programId')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'alt_title': video.get('subtitle'),
|
||||
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||
'duration': int_or_none(video.get('durationSeconds')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'ie_key': ArteTVIE.ie_key(),
|
||||
})
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
items.append(video)
|
||||
|
||||
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
|
||||
|
||||
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
|
||||
description=self._og_search_description(webpage, default=None))
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
@@ -181,8 +178,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@@ -37,9 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
@@ -48,11 +41,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
@@ -95,7 +84,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats, subtitles = self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
|
34
plugins/youtube_download/yt_dlp/extractor/atscaleconf.py
Normal file
34
plugins/youtube_download/yt_dlp/extractor/atscaleconf.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AtScaleConfEventIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://atscaleconference.com/events/data-scale-spring-2022/',
|
||||
'playlist_mincount': 13,
|
||||
'info_dict': {
|
||||
'id': 'data-scale-spring-2022',
|
||||
'title': 'Data @Scale Spring 2022',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://atscaleconference.com/events/video-scale-2021/',
|
||||
'playlist_mincount': 14,
|
||||
'info_dict': {
|
||||
'id': 'video-scale-2021',
|
||||
'title': 'Video @Scale 2021',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
|
||||
ie='Generic', playlist_id=id,
|
||||
title=self._og_search_title(webpage), description=self._og_search_description(webpage))
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,6 +5,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +49,6 @@ class ATVAtIE(InfoExtractor):
|
||||
'url': source_url,
|
||||
'format_id': protocol,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
@@ -94,6 +91,11 @@ class ATVAtIE(InfoExtractor):
|
||||
})
|
||||
|
||||
video_id, videos_data = list(videos['data'].items())[0]
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, contentResource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -79,7 +76,6 @@ class AudiMediaIE(InfoExtractor):
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,27 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
)
|
||||
from ..utils import clean_html, float_or_none, traverse_obj, unescapeHTML
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||
'md5': '4d68be11c9f9daf3dab0778ad1e010c3',
|
||||
'info_dict': {
|
||||
'id': '7398103',
|
||||
'ext': 'mp3',
|
||||
'title': 'Asim Chaudhry',
|
||||
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||
'description': 'md5:0ed714ae0e81e5d9119cac2f618ad679',
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
}, { # Direct mp3-file link
|
||||
'url': 'https://audioboom.com/posts/8128496.mp3',
|
||||
'md5': 'e329edf304d450def95c7f86a9165ee1',
|
||||
'info_dict': {
|
||||
'id': '8128496',
|
||||
'ext': 'mp3',
|
||||
'title': 'TCRNo8 / DAILY 03 - In Control',
|
||||
'description': 'md5:44665f142db74858dfa21c5b34787948',
|
||||
'duration': 1689.7,
|
||||
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
@@ -29,45 +35,23 @@ class AudioBoomIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://audioboom.com/posts/{video_id}', video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
clips = clip_store.get('clips')
|
||||
if clips and isinstance(clips, list) and isinstance(clips[0], dict):
|
||||
clip = clips[0]
|
||||
|
||||
def from_clip(field):
|
||||
if clip:
|
||||
return clip.get(field)
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._html_search_meta(
|
||||
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
clip_store = self._search_json(
|
||||
r'data-react-class="V5DetailPagePlayer"\s*data-react-props=["\']',
|
||||
webpage, 'clip store', video_id, fatal=False, transform_source=unescapeHTML)
|
||||
clip = traverse_obj(clip_store, ('clips', 0), expected_type=dict) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'url': clip.get('clipURLPriorToLoading') or self._og_search_property('audio', webpage, 'audio url'),
|
||||
'title': clip.get('title') or self._html_search_meta(['og:title', 'og:audio:title', 'audio_title'], webpage),
|
||||
'description': (clip.get('description') or clean_html(clip.get('formattedDescription'))
|
||||
or self._og_search_description(webpage)),
|
||||
'duration': float_or_none(clip.get('duration') or self._html_search_meta('weibo:audio:duration', webpage)),
|
||||
'uploader': clip.get('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader'),
|
||||
'uploader_url': clip.get('author_url') or self._html_search_regex(
|
||||
r'<div class="avatar flex-shrink-0">\s*<a href="(?P<uploader_url>http[^"]+)"',
|
||||
webpage, 'uploader url', fatal=False),
|
||||
}
|
||||
|
93
plugins/youtube_download/yt_dlp/extractor/audiodraft.py
Normal file
93
plugins/youtube_download/yt_dlp/extractor/audiodraft.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class AudiodraftBaseIE(InfoExtractor):
|
||||
def _audiodraft_extract_from_id(self, player_entry_id):
|
||||
data_json = self._download_json(
|
||||
'https://www.audiodraft.com/scripts/general/player/getPlayerInfoNew.php', player_entry_id,
|
||||
headers={
|
||||
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=f'id={player_entry_id}'.encode('utf-8'))
|
||||
|
||||
return {
|
||||
'id': str(data_json['entry_id']),
|
||||
'title': data_json.get('entry_title'),
|
||||
'url': data_json['path'],
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
'uploader': data_json.get('designer_name'),
|
||||
'uploader_id': data_json.get('designer_id'),
|
||||
'webpage_url': data_json.get('entry_url'),
|
||||
'like_count': int_or_none(data_json.get('entry_likes')),
|
||||
'average_rating': int_or_none(data_json.get('entry_rating')),
|
||||
}
|
||||
|
||||
|
||||
class AudiodraftCustomIE(AudiodraftBaseIE):
|
||||
IE_NAME = 'Audiodraft:custom'
|
||||
_VALID_URL = r'https?://(?:[-\w]+)\.audiodraft\.com/entry/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://nokiatune.audiodraft.com/entry/5874',
|
||||
'info_dict': {
|
||||
'id': '9485',
|
||||
'ext': 'mp3',
|
||||
'title': 'Hula Hula Calls',
|
||||
'uploader': 'unclemaki',
|
||||
'uploader_id': '13512',
|
||||
'average_rating': 5,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vikinggrace.audiodraft.com/entry/501',
|
||||
'info_dict': {
|
||||
'id': '22241',
|
||||
'ext': 'mp3',
|
||||
'title': 'MVG Happy',
|
||||
'uploader': 'frog',
|
||||
'uploader_id': '19142',
|
||||
'average_rating': 5,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://timferriss.audiodraft.com/entry/765',
|
||||
'info_dict': {
|
||||
'id': '19710',
|
||||
'ext': 'mp3',
|
||||
'title': 'ferris03',
|
||||
'uploader': 'malex',
|
||||
'uploader_id': '17335',
|
||||
'average_rating': 5,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
|
||||
return self._audiodraft_extract_from_id(player_entry_id)
|
||||
|
||||
|
||||
class AudiodraftGenericIE(AudiodraftBaseIE):
|
||||
IE_NAME = 'Audiodraft:generic'
|
||||
_VALID_URL = r'https?://www\.audiodraft\.com/contests/[^/#]+#entries&eid=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.audiodraft.com/contests/570-Score-A-Video-Surprise-Us#entries&eid=30138',
|
||||
'info_dict': {
|
||||
'id': '30138',
|
||||
'ext': 'mp3',
|
||||
'title': 'DROP in sound_V2',
|
||||
'uploader': 'TiagoSilva',
|
||||
'uploader_id': '19452',
|
||||
'average_rating': 4,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{id}')
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import time
|
||||
|
||||
@@ -29,6 +26,7 @@ class AudiomackIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
|
@@ -1,11 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, try_get, compat_str, str_or_none
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError, str_or_none, try_get
|
||||
|
||||
|
||||
class AudiusBaseIE(InfoExtractor):
|
||||
@@ -171,7 +168,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AudiusTrackIE(AudiusIE):
|
||||
class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
|
||||
IE_NAME = 'audius:track'
|
||||
IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
|
||||
@@ -246,7 +243,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
playlist_data.get('description'))
|
||||
|
||||
|
||||
class AudiusProfileIE(AudiusPlaylistIE):
|
||||
class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'audius:artist'
|
||||
IE_DESC = 'Audius.co profile/artist pages'
|
||||
_VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -44,7 +41,7 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'),
|
||||
'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
@@ -9,7 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor):
|
||||
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
|
||||
_AWS_REGION = 'us-east-1'
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,11 +8,12 @@ class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:www\.|tv\.)?
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
telem1\.ch|
|
||||
tvo-online\.ch
|
||||
)/
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
@@ -30,7 +28,7 @@ class AZMedienIE(InfoExtractor):
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'url': 'https://tv.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
@@ -38,6 +36,9 @@ class AZMedienIE(InfoExtractor):
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
|
||||
'duration': 1930
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
148
plugins/youtube_download/yt_dlp/extractor/banbye.py
Normal file
148
plugins/youtube_download/yt_dlp/extractor/banbye.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BanByeBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.banbye.com'
|
||||
_CDN_BASE = 'https://cdn.banbye.com'
|
||||
_VIDEO_BASE = 'https://banbye.com/watch'
|
||||
|
||||
@staticmethod
|
||||
def _extract_playlist_id(url, param='playlist'):
|
||||
return compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
|
||||
|
||||
def _extract_playlist(self, playlist_id):
|
||||
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
|
||||
return self.playlist_result([
|
||||
self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
|
||||
for video_id in data['videoIds']], playlist_id, data.get('name'))
|
||||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
'id': 'v_ytfmvkVYLE8T',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
|
||||
'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'timestamp': 1647604800,
|
||||
'upload_date': '20220318',
|
||||
'duration': 1931,
|
||||
'thumbnail': r're:https?://.*\.webp',
|
||||
'tags': 'count:5',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
|
||||
'info_dict': {
|
||||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._extract_playlist_id(url, 'playlistId')
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
|
||||
thumbnails = [{
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data.get('title'),
|
||||
'description': data.get('desc'),
|
||||
'uploader': traverse_obj(data, ('channel', 'name')),
|
||||
'channel_id': data.get('channelId'),
|
||||
'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
|
||||
'timestamp': unified_timestamp(data.get('publishedAt')),
|
||||
'duration': data.get('duration'),
|
||||
'tags': data.get('tags'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': data.get('likes'),
|
||||
'dislike_count': data.get('dislikes'),
|
||||
'view_count': data.get('views'),
|
||||
'comment_count': data.get('commentCount'),
|
||||
}
|
||||
|
||||
|
||||
class BanByeChannelIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'info_dict': {
|
||||
'title': 'wRealu24',
|
||||
'id': 'ch_wrealu24',
|
||||
'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
|
||||
},
|
||||
'playlist_mincount': 791,
|
||||
}, {
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
|
||||
'info_dict': {
|
||||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
playlist_id = self._extract_playlist_id(url)
|
||||
|
||||
if playlist_id:
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
def page_func(page_num):
|
||||
data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
|
||||
'channelId': channel_id,
|
||||
'sort': 'new',
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page_num * self._PAGE_SIZE,
|
||||
}, note=f'Downloading page {page_num+1}')
|
||||
return [
|
||||
self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
|
||||
for video in data['items']
|
||||
]
|
||||
|
||||
channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
|
||||
entries = InAdvancePagedList(
|
||||
page_func,
|
||||
math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, channel_data.get('name'), channel_data.get('description'))
|
@@ -1,11 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .brightcove import BrightcoveNewBaseIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BandaiChannelIE(BrightcoveNewIE):
|
||||
class BandaiChannelIE(BrightcoveNewBaseIE):
|
||||
IE_NAME = 'bandaichannel'
|
||||
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||
_TESTS = [{
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -8,34 +5,42 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
'id': '1812978515',
|
||||
'ext': 'mp3',
|
||||
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||
'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
|
||||
'duration': 9.8485,
|
||||
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||
'upload_date': '20121129',
|
||||
'timestamp': 1354224127,
|
||||
'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
|
||||
'album_artist': 'youtube-dl "\'/\\ä↭',
|
||||
'track_id': '1812978515',
|
||||
'artist': 'youtube-dl "\'/\\ä↭',
|
||||
'uploader_url': 'https://youtube-dl.bandcamp.com',
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
@@ -43,7 +48,8 @@ class BandcampIE(InfoExtractor):
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'ext': 'm4a',
|
||||
'acodec': r're:[fa]lac',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
@@ -56,7 +62,10 @@ class BandcampIE(InfoExtractor):
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album_artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
'uploader_url': 'https://benprunty.bandcamp.com',
|
||||
'uploader_id': 'benprunty',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
@@ -77,7 +86,34 @@ class BandcampIE(InfoExtractor):
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album_artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
'uploader_url': 'https://relapsealumni.bandcamp.com',
|
||||
'uploader_id': 'relapsealumni',
|
||||
},
|
||||
}, {
|
||||
# track from compilation album (artist/album_artist difference)
|
||||
'url': 'https://diskotopia.bandcamp.com/track/safehouse',
|
||||
'md5': '19c5337bca1428afa54129f86a2f6a69',
|
||||
'info_dict': {
|
||||
'id': '1978174799',
|
||||
'ext': 'mp3',
|
||||
'title': 'submerse - submerse - Safehouse',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'submerse',
|
||||
'timestamp': 1480779297,
|
||||
'upload_date': '20161203',
|
||||
'release_timestamp': 1481068800,
|
||||
'release_date': '20161207',
|
||||
'duration': 154.066,
|
||||
'track': 'submerse - Safehouse',
|
||||
'track_number': 3,
|
||||
'track_id': '1978174799',
|
||||
'artist': 'submerse',
|
||||
'album_artist': 'Diskotopia',
|
||||
'album': 'DSK F/W 2016-2017 Free Compilation',
|
||||
'uploader_url': 'https://diskotopia.bandcamp.com',
|
||||
'uploader_id': 'diskotopia',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -87,7 +123,7 @@ class BandcampIE(InfoExtractor):
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = self._match_id(url)
|
||||
title, uploader = self._match_valid_url(url).group('id', 'uploader')
|
||||
webpage = self._download_webpage(url, title)
|
||||
tralbum = self._extract_data_attr(webpage, title)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -123,6 +159,9 @@ class BandcampIE(InfoExtractor):
|
||||
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||
current = tralbum.get('current') or {}
|
||||
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||
album_artist = self._html_search_regex(
|
||||
r'<h3 class="albumTitle">[\S\s]*?by\s*<span>\s*<a href="[^>]+">\s*([^>]+?)\s*</a>',
|
||||
webpage, 'album artist', fatal=False)
|
||||
timestamp = unified_timestamp(
|
||||
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||
|
||||
@@ -183,10 +222,9 @@ class BandcampIE(InfoExtractor):
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
'acodec': format_id.split('-')[0],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
@@ -198,6 +236,8 @@ class BandcampIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'uploader_id': uploader,
|
||||
'uploader_url': f'https://{uploader}.bandcamp.com',
|
||||
'timestamp': timestamp,
|
||||
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
@@ -206,13 +246,14 @@ class BandcampIE(InfoExtractor):
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': embed.get('album_title'),
|
||||
'album_artist': album_artist,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -257,14 +298,6 @@ class BandcampAlbumIE(BandcampIE):
|
||||
'id': 'hierophany-of-the-open-grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
'id': 'dotscale',
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
@@ -321,7 +354,7 @@ class BandcampAlbumIE(BandcampIE):
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(BandcampIE):
|
||||
class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
@@ -370,7 +403,6 @@ class BandcampWeeklyIE(BandcampIE):
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
@@ -391,41 +423,63 @@ class BandcampWeeklyIE(BandcampIE):
|
||||
}
|
||||
|
||||
|
||||
class BandcampMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
|
||||
class BandcampUserIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:user'
|
||||
_VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
# Type 1 Bandcamp user page.
|
||||
'url': 'https://adrianvonziegler.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'adrianvonziegler',
|
||||
'title': 'Discography of adrianvonziegler',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
}, {
|
||||
# Bandcamp user page with only one album
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'dotscale',
|
||||
'title': 'Discography of dotscale'
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# Type 2 Bandcamp user page.
|
||||
'url': 'https://nightcallofficial.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'nightcallofficial',
|
||||
'title': 'Discography of nightcallofficial',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://steviasphere.bandcamp.com/music',
|
||||
'playlist_mincount': 47,
|
||||
'info_dict': {
|
||||
'id': 'steviasphere',
|
||||
'title': 'Discography of steviasphere',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
|
||||
'playlist_mincount': 399,
|
||||
'info_dict': {
|
||||
'id': 'nuclearwarnowproductions',
|
||||
'title': 'Discography of nuclearwarnowproductions',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_TYPE_IE_DICT = {
|
||||
'album': BandcampAlbumIE.ie_key(),
|
||||
'track': BandcampIE.ie_key()
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://{id}.bandcamp.com/{item[0]}',
|
||||
ie=self._TYPE_IE_DICT[item[1]])
|
||||
for item in items]
|
||||
return self.playlist_result(entries, id)
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -137,7 +135,6 @@ query GetCommentReplies($id: String!) {
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_info.get('streamUrl'), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', live=True))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,18 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -38,7 +32,7 @@ from ..utils import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -52,6 +46,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
''' % _ID_REGEX
|
||||
_EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
|
||||
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
@@ -263,11 +258,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading signin page')
|
||||
|
||||
@@ -293,9 +284,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
@@ -324,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
continue
|
||||
captions = self._download_xml(
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
if not isinstance(captions, xml.etree.ElementTree.Element):
|
||||
continue
|
||||
subtitles['en'] = [
|
||||
{
|
||||
@@ -394,9 +382,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||
try:
|
||||
fmts = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
@@ -579,8 +575,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
@@ -592,10 +586,15 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class BBCIE(BBCCoUkIE):
|
||||
class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'bbc'
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:
|
||||
bbc\.(?:com|co\.uk)|
|
||||
bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
|
||||
bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
|
||||
)/(?:[^/]+/)+(?P<id>[^/#?]+)'''
|
||||
|
||||
_MEDIA_SETS = [
|
||||
'pc',
|
||||
@@ -784,21 +783,33 @@ class BBCIE(BBCCoUkIE):
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# bbcthreeConfig
|
||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||
'info_dict': {
|
||||
'id': 'p06556y7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||
'title': 'Things Not To Say to people that live on council estates',
|
||||
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||
'duration': 360,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
@@ -833,6 +844,12 @@ class BBCIE(BBCCoUkIE):
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -871,7 +888,6 @@ class BBCIE(BBCCoUkIE):
|
||||
def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(url, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
@@ -890,13 +906,8 @@ class BBCIE(BBCCoUkIE):
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
if not playlist_title:
|
||||
playlist_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
|
||||
if playlist_title:
|
||||
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
|
||||
playlist_title = json_ld_info.get('title') or re.sub(
|
||||
r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
|
||||
|
||||
playlist_description = json_ld_info.get(
|
||||
'description') or self._og_search_description(webpage, default=None)
|
||||
@@ -940,7 +951,6 @@ class BBCIE(BBCCoUkIE):
|
||||
duration = int_or_none(items[0].get('duration'))
|
||||
programme_id = items[0].get('vpid')
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
@@ -977,7 +987,6 @@ class BBCIE(BBCCoUkIE):
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
self._sort_formats(entry['formats'])
|
||||
entries.append(entry)
|
||||
|
||||
if entries:
|
||||
@@ -1001,7 +1010,6 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
|
||||
digital_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -1033,7 +1041,6 @@ class BBCIE(BBCCoUkIE):
|
||||
if version_id:
|
||||
title = smp_data['title']
|
||||
formats, subtitles = self._download_media_selector(version_id)
|
||||
self._sort_formats(formats)
|
||||
image_url = smp_data.get('holdingImageURL')
|
||||
display_date = init_data.get('displayDate')
|
||||
topic_title = init_data.get('topicTitle')
|
||||
@@ -1075,7 +1082,6 @@ class BBCIE(BBCCoUkIE):
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
@@ -1104,7 +1110,6 @@ class BBCIE(BBCCoUkIE):
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
@@ -1137,7 +1142,6 @@ class BBCIE(BBCCoUkIE):
|
||||
clip_title = clip.get('title')
|
||||
if clip_vpid and clip_title:
|
||||
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': clip_vpid,
|
||||
'title': clip_title,
|
||||
@@ -1159,7 +1163,6 @@ class BBCIE(BBCCoUkIE):
|
||||
if not programme_id:
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': programme_id,
|
||||
'title': playlist_title,
|
||||
@@ -1171,9 +1174,16 @@ class BBCIE(BBCCoUkIE):
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
if initial_data is None:
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||
'preload state', default={})
|
||||
else:
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
@@ -1184,7 +1194,6 @@ class BBCIE(BBCCoUkIE):
|
||||
if not (item_id and item_title):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
item_desc = None
|
||||
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||
if blocks:
|
||||
@@ -1214,8 +1223,11 @@ class BBCIE(BBCCoUkIE):
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
if block.get('type') != 'media':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
return self.playlist_result(
|
||||
@@ -1282,7 +1294,6 @@ class BBCIE(BBCCoUkIE):
|
||||
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||
if not formats and not self.get_param('ignore_no_formats'):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
|
101
plugins/youtube_download/yt_dlp/extractor/beatbump.py
Normal file
101
plugins/youtube_download/yt_dlp/extractor/beatbump.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE, YoutubeTabIE
|
||||
|
||||
|
||||
class BeatBumpVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
|
||||
'md5': '5ff3fff41d3935b9810a9731e485fe66',
|
||||
'info_dict': {
|
||||
'id': 'MgNrAu2pzNs',
|
||||
'ext': 'mp4',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'artist': 'Stephen',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'upload_date': '20190312',
|
||||
'categories': ['Music'],
|
||||
'playable_in_embed': True,
|
||||
'duration': 169,
|
||||
'like_count': int,
|
||||
'alt_title': 'Voyeur Girl',
|
||||
'view_count': int,
|
||||
'track': 'Voyeur Girl',
|
||||
'uploader': 'Stephen - Topic',
|
||||
'title': 'Voyeur Girl',
|
||||
'channel_follower_count': int,
|
||||
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'album': 'it\'s too much love to know my dear',
|
||||
'channel': 'Stephen',
|
||||
'comment_count': int,
|
||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||
'tags': 'count:11',
|
||||
'creator': 'Stephen',
|
||||
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
|
||||
|
||||
|
||||
class BeatBumpPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
|
||||
'playlist_count': 50,
|
||||
'info_dict': {
|
||||
'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
|
||||
'availability': 'unlisted',
|
||||
'view_count': int,
|
||||
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'modified_date': '20221223',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_follower_count': int,
|
||||
'title': 'NoCopyrightSounds - Videos',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': 'count:12',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'title': 'NCS : All Releases 💿',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'availability': 'public',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': [],
|
||||
'modified_date': '20221225',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -77,7 +74,6 @@ class BeatportIE(InfoExtractor):
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
images = []
|
||||
for name, info in track['images'].items():
|
||||
|
@@ -1,32 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'url': 'https://beeg.com/-0983946056129650',
|
||||
'md5': '51d235147c4627cfce884f844293ff88',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'id': '0983946056129650',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
|
||||
'timestamp': 1391813355,
|
||||
'upload_date': '20140207',
|
||||
'duration': 383,
|
||||
'title': 'sucked cock and fucked in a private plane',
|
||||
'duration': 927,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': 2540839,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
|
||||
'info_dict': {
|
||||
'id': '0599050563103750',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bad Relatives',
|
||||
'duration': 2060,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
|
||||
'timestamp': 1643623200,
|
||||
'display_id': 2569965,
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
@@ -36,12 +47,6 @@ class BeegIE(InfoExtractor):
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/5416503',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -49,68 +54,36 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
video = self._download_json(
|
||||
'https://store.externulls.com/facts/file/%s' % video_id,
|
||||
video_id, 'Downloading JSON for %s' % video_id)
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = parse_qs(url)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
fc_facts = video.get('fc_facts')
|
||||
first_fact = {}
|
||||
for fact in fc_facts:
|
||||
if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
|
||||
first_fact = fact
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
for format_id, video_uri in resources.items():
|
||||
if not video_uri:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
|
||||
current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
|
||||
for f in current_formats:
|
||||
f['height'] = height
|
||||
formats.extend(current_formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'display_id': first_fact.get('id'),
|
||||
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
|
||||
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
|
||||
'timestamp': unified_timestamp(first_fact.get('fc_created')),
|
||||
'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
|
||||
'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_basename
|
||||
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -28,7 +24,7 @@ class BellMediaIE(InfoExtractor):
|
||||
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||
'md5': '3e5b8e38370741d5089da79161646635',
|
||||
'info_dict': {
|
||||
'id': '1403070',
|
||||
'ext': 'flv',
|
||||
@@ -36,6 +32,14 @@ class BellMediaIE(InfoExtractor):
|
||||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
'season_id': 73997,
|
||||
'season': '2018',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
|
||||
'tags': [],
|
||||
'categories': ['ETFs'],
|
||||
'season_number': 8,
|
||||
'duration': 272.038,
|
||||
'series': 'Market Call Tonight',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
|
70
plugins/youtube_download/yt_dlp/extractor/berufetv.py
Normal file
70
plugins/youtube_download/yt_dlp/extractor/berufetv.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, mimetype2ext, traverse_obj
|
||||
|
||||
|
||||
class BerufeTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
|
||||
'md5': '041b6432ec8e6838f84a5c30f31cc795',
|
||||
'info_dict': {
|
||||
'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswirtschaftslehre',
|
||||
'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
|
||||
'categories': ['Studien­beruf'],
|
||||
'tags': ['Studienfilm'],
|
||||
'duration': 602.440,
|
||||
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
movie_metadata = self._download_json(
|
||||
'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
|
||||
video_id, 'Downloading JSON metadata',
|
||||
headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
|
||||
|
||||
meta = traverse_obj(
|
||||
movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
|
||||
get_all=False, default={})
|
||||
|
||||
video = self._download_json(
|
||||
f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for key, source in video['videoSources']['html'].items():
|
||||
if key == 'auto':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
|
||||
formats += fmts
|
||||
subtitles = subs
|
||||
else:
|
||||
formats.append({
|
||||
'url': source[0]['source'],
|
||||
'ext': mimetype2ext(source[0]['mimeType']),
|
||||
'format_id': key,
|
||||
})
|
||||
|
||||
for track in video.get('videoTracks') or []:
|
||||
if track.get('type') != 'SUBTITLES':
|
||||
continue
|
||||
subtitles.setdefault(track['language'], []).append({
|
||||
'url': track['source'],
|
||||
'name': track.get('label'),
|
||||
'ext': 'vtt'
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
|
||||
'description': meta.get('beschreibung'),
|
||||
'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
|
||||
'duration': float_or_none(video.get('duration'), scale=1000),
|
||||
'categories': [meta['kategorie']] if meta.get('kategorie') else None,
|
||||
'tags': meta.get('themengebiete'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,7 +5,7 @@ from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
@@ -34,6 +31,9 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20201002',
|
||||
'timestamp': 1601629620,
|
||||
'duration': 44.757,
|
||||
'tags': ['bfmactu', 'politique'],
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876450610001/5041f4c1-bc48-4af8-a256-1b8300ad8ef0/cf2f9114-e8e2-4494-82b4-ab794ea4bc7d/1920x1080/match/image.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -45,7 +45,7 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE):
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
@@ -84,6 +84,20 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rmc.bfmtv.com/actualites/societe/transports/ce-n-est-plus-tout-rentable-le-bioethanol-e85-depasse-1eu-le-litre-des-automobilistes-regrettent_AV-202301100268.html',
|
||||
'info_dict': {
|
||||
'id': '6318445464112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
|
||||
'description': None,
|
||||
'uploader_id': '876630703001',
|
||||
'upload_date': '20230110',
|
||||
'timestamp': 1673341692,
|
||||
'duration': 109.269,
|
||||
'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -66,8 +63,6 @@ class BigflixIE(InfoExtractor):
|
||||
'url': decode_url(file_url),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
return {
|
||||
|
56
plugins/youtube_download/yt_dlp/extractor/bigo.py
Normal file
56
plugins/youtube_download/yt_dlp/extractor/bigo.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class BigoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bigo.tv/ja/221338632',
|
||||
'info_dict': {
|
||||
'id': '6576287577575737440',
|
||||
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '✨Shin💫',
|
||||
'uploader_id': '221338632',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bigo.tv/115976881',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
info_raw = self._download_json(
|
||||
'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo',
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||
|
||||
if not isinstance(info_raw, dict):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
raise ExtractorError('This user is offline.', expected=True)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
info.get('hls_src'), user_id, 'mp4', 'm3u8')
|
||||
|
||||
return {
|
||||
'id': info.get('roomId') or user_id,
|
||||
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'thumbnail': info.get('snapshot'),
|
||||
'uploader': info.get('nick_name'),
|
||||
'uploader_id': user_id,
|
||||
'is_live': True,
|
||||
}
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
|
@@ -1,29 +1,27 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vk import VKIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BIQLEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
# Youtube embed
|
||||
'url': 'https://biqle.ru/watch/-115995369_456239081',
|
||||
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
|
||||
'url': 'https://biqle.ru/watch/-2000421746_85421746',
|
||||
'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
|
||||
'info_dict': {
|
||||
'id': '8v4f-avW-VI',
|
||||
'id': '-2000421746_85421746',
|
||||
'ext': 'mp4',
|
||||
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
|
||||
'description': 'Passe-Partout',
|
||||
'uploader_id': 'mrsimpsonstef3',
|
||||
'uploader': 'Phanolito',
|
||||
'upload_date': '20120822',
|
||||
'title': 'Forsaken By Hope Studio Clip',
|
||||
'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
|
||||
'upload_date': '19700101',
|
||||
'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
|
||||
'timestamp': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://biqle.org/watch/-44781847_168547604',
|
||||
@@ -32,54 +30,62 @@ class BIQLEIE(InfoExtractor):
|
||||
'id': '-44781847_168547604',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
|
||||
'timestamp': 1396633454,
|
||||
'uploader': 'Dmitry Kotov',
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '47850140',
|
||||
'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
|
||||
webpage, 'embed url'))
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'Title', fatal=False)
|
||||
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
|
||||
description = self._html_search_meta('description', webpage, 'Description', default=None)
|
||||
|
||||
global_embed_url = self._search_regex(
|
||||
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
|
||||
webpage, 'global Embed url')
|
||||
hash = self._search_regex(
|
||||
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
|
||||
|
||||
embed_url = global_embed_url + hash
|
||||
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, video_id, headers={'Referer': url})
|
||||
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||
if video_ext:
|
||||
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||
if not video_ext:
|
||||
video_ext = compat_b64decode(self._search_regex(
|
||||
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||
embed_page, 'video_ext')).decode()
|
||||
video_id, sig, _, access_token = video_ext.split(':')
|
||||
embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
|
||||
|
||||
glob_params = self._parse_json(self._search_regex(
|
||||
r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
|
||||
embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
|
||||
host_name = compat_b64decode(glob_params['server'][::-1]).decode()
|
||||
|
||||
item = self._download_json(
|
||||
'https://api.vk.com/method/video.get', video_id,
|
||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||
'access_token': access_token,
|
||||
'sig': sig,
|
||||
'v': 5.44,
|
||||
f'https://{host_name}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': glob_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': glob_params['c_key'],
|
||||
'credentials': glob_params['video']['credentials'],
|
||||
})['response']['items'][0]
|
||||
title = item['title']
|
||||
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
formats.append({
|
||||
'format_id': height + 'p',
|
||||
'url': f_url,
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
|
||||
if height_extra_key:
|
||||
formats.append({
|
||||
'format_id': f'{height}p',
|
||||
'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for k, v in item.items():
|
||||
@@ -96,10 +102,9 @@ class BIQLEIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': item.get('description'),
|
||||
'description': description,
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('date')),
|
||||
'uploader': item.get('owner_id'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
}
|
||||
|
@@ -1,14 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -16,11 +22,12 @@ from ..utils import (
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'id': 'UGlrF9o9b-Q',
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
@@ -28,6 +35,31 @@ class BitChuteIE(InfoExtractor):
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/',
|
||||
'md5': '05c12397d5354bf24494885b08d24ed1',
|
||||
'info_dict': {
|
||||
'id': '2s6B3nZjAk7R',
|
||||
'ext': 'mp4',
|
||||
'filesize': 71537926,
|
||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
# restricted video
|
||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
||||
'info_dict': {
|
||||
'id': 'WEnQU7XGcTdl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Georestricted in DE',
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
@@ -35,124 +67,167 @@ class BitChuteIE(InfoExtractor):
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
|
||||
webpage)]
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
'Referer': 'https://www.bitchute.com/',
|
||||
}
|
||||
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||
continue
|
||||
return {
|
||||
'url': url,
|
||||
'filesize': int_or_none(response.headers.get('Content-Length'))
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
formats = []
|
||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
||||
if self.get_param('check_formats') is not False:
|
||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
||||
if 'url' not in format_:
|
||||
continue
|
||||
formats.append(format_)
|
||||
|
||||
if not formats:
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage, video_id)
|
||||
if not entries:
|
||||
error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
|
||||
if error == 'Video Unavailable':
|
||||
raise GeoRestrictedError(error)
|
||||
raise ExtractorError(error)
|
||||
formats = entries[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
|
||||
webpage, 'upload date', fatal=False))
|
||||
self.raise_no_formats(
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(get_element_by_class('owner', webpage)),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
},
|
||||
}
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
|
||||
'info_dict': {
|
||||
'id': 'UGlrF9o9b-Q',
|
||||
'ext': 'mp4',
|
||||
'filesize': None,
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'playlist_items': '-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'playlist_mincount': 20,
|
||||
'info_dict': {
|
||||
'id': 'wV9Imujxasw9',
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
|
||||
}
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
PAGE_SIZE = 25
|
||||
HTML_CLASS_NAMES = {
|
||||
'channel': {
|
||||
'container': 'channel-videos-container',
|
||||
'title': 'channel-videos-title',
|
||||
'description': 'channel-videos-text',
|
||||
},
|
||||
'playlist': {
|
||||
'container': 'playlist-video',
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
}
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
data = self._download_json(
|
||||
f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}',
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': page_num * self.PAGE_SIZE,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': playlist_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': f'csrftoken={self._TOKEN}',
|
||||
})
|
||||
if not data.get('success'):
|
||||
return
|
||||
classes = self.HTML_CLASS_NAMES[playlist_type]
|
||||
for video_html in get_elements_html_by_class(classes['container'], data.get('html')):
|
||||
video_id = self._search_regex(
|
||||
r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None)
|
||||
if not video_id:
|
||||
continue
|
||||
yield self.url_result(
|
||||
f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True,
|
||||
title=clean_html(get_element_by_class(classes['title'], video_html)),
|
||||
description=clean_html(get_element_by_class(classes['description'], video_html)),
|
||||
duration=parse_duration(get_element_by_class('video-duration', video_html)),
|
||||
view_count=parse_count(clean_html(get_element_by_class('video-views', video_html))))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id)
|
||||
|
||||
page_func = functools.partial(self._fetch_page, playlist_id, playlist_type)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
||||
OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id,
|
||||
title=self._html_extract_title(webpage, default=None),
|
||||
description=self._html_search_meta(
|
||||
('description', 'og:description', 'twitter:description'), webpage, default=None),
|
||||
playlist_count=int_or_none(self._html_search_regex(
|
||||
r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None)))
|
||||
|
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -47,7 +45,6 @@ class BitwaveStreamIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
channel['data']['url'], username,
|
||||
'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': username,
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from ..utils import (
|
||||
|
@@ -1,86 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
|
||||
+ 'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for m in data['media']:
|
||||
if m['type'] == 'jpg':
|
||||
thumbnails.append({
|
||||
'url': m['link'],
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'fullid': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data.get('channel_name'),
|
||||
'timestamp': data.get('pubdate_epoch'),
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
}
|
@@ -1,8 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
@@ -16,7 +11,7 @@ from .common import InfoExtractor
|
||||
class BloggerIE(InfoExtractor):
|
||||
IE_NAME = 'blogger.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
|
||||
_VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']'''
|
||||
_EMBED_REGEX = [r'''<iframe[^>]+src=["'](?P<url>(?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
|
||||
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
|
||||
@@ -29,10 +24,6 @@ class BloggerIE(InfoExtractor):
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(BloggerIE._VALID_EMBED, webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, token_id)
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,13 +7,11 @@ class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||
# The md5 checksum changes
|
||||
'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video',
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'id': 'V8cFcYMxTHaMcEiiYVr39A',
|
||||
'ext': 'flv',
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
@@ -60,7 +55,7 @@ class BloombergIE(InfoExtractor):
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
|
||||
'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
|
||||
formats = []
|
||||
for stream in embed_info['streams']:
|
||||
stream_url = stream.get('url')
|
||||
@@ -72,7 +67,6 @@ class BloombergIE(InfoExtractor):
|
||||
else:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,7 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import ExtractorError
|
||||
@@ -25,8 +21,6 @@ class BokeCCBaseIE(InfoExtractor):
|
||||
'quality': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
|
@@ -1,6 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -11,13 +8,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.bongacams.net/claireashton',
|
||||
'info_dict': {
|
||||
'id': 'claireashton',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'age_limit': 18,
|
||||
'uploader_id': 'ClaireAshton',
|
||||
'uploader': 'ClaireAshton',
|
||||
'like_count': int,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -45,7 +57,6 @@ class BongaCamsIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
|
86
plugins/youtube_download/yt_dlp/extractor/booyah.py
Normal file
86
plugins/youtube_download/yt_dlp/extractor/booyah.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, traverse_obj
|
||||
|
||||
|
||||
class BooyahBaseIE(InfoExtractor):
|
||||
_BOOYAH_SESSION_KEY = None
|
||||
|
||||
def _real_initialize(self):
|
||||
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
|
||||
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
comment_json = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
|
||||
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
|
||||
|
||||
return [{
|
||||
'id': comment.get('comment_id'),
|
||||
'author': comment.get('from_nickname'),
|
||||
'author_id': comment.get('from_uid'),
|
||||
'author_thumbnail': comment.get('from_thumbnail'),
|
||||
'text': comment.get('content'),
|
||||
'timestamp': comment.get('create_time'),
|
||||
'like_count': comment.get('like_cnt'),
|
||||
} for comment in comment_json.get('comment_list') or ()]
|
||||
|
||||
|
||||
class BooyahClipsIE(BooyahBaseIE):
|
||||
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://booyah.live/clips/13887261322952306617',
|
||||
'info_dict': {
|
||||
'id': '13887261322952306617',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'duration': 30,
|
||||
'channel_id': 90565760,
|
||||
'like_count': int,
|
||||
'title': 'Cayendo con estilo 😎',
|
||||
'uploader': '♡LɪꜱGΛMER',
|
||||
'comment_count': int,
|
||||
'uploader_id': '90565760',
|
||||
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
|
||||
'upload_date': '20220617',
|
||||
'timestamp': 1655490556,
|
||||
'modified_timestamp': 1655490556,
|
||||
'modified_date': '20220617',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
|
||||
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
|
||||
|
||||
formats = []
|
||||
for video_data in json_data['playback']['endpoint_list']:
|
||||
formats.extend(({
|
||||
'url': video_data.get('stream_url'),
|
||||
'ext': 'mp4',
|
||||
'height': video_data.get('resolution'),
|
||||
}, {
|
||||
'url': video_data.get('download_url'),
|
||||
'ext': 'mp4',
|
||||
'format_note': 'Watermarked',
|
||||
'height': video_data.get('resolution'),
|
||||
'preference': -10,
|
||||
}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(json_data, ('playback', 'name')),
|
||||
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
|
||||
'formats': formats,
|
||||
'view_count': traverse_obj(json_data, ('playback', 'views')),
|
||||
'like_count': traverse_obj(json_data, ('playback', 'likes')),
|
||||
'duration': traverse_obj(json_data, ('playback', 'duration')),
|
||||
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
|
||||
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
|
||||
'uploader': traverse_obj(json_data, ('user', 'nickname')),
|
||||
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
|
||||
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
|
||||
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
|
||||
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
|
||||
}
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,6 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -82,8 +79,6 @@ class BoxIE(InfoExtractor):
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
return {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user