Upgrade yt_dlp and download script

This commit is contained in:
2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions

View File

@@ -3,15 +3,12 @@ import json
import os
import random
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
dict_get,
ExtractorError,
dict_get,
float_or_none,
int_or_none,
parse_duration,
@@ -27,7 +24,7 @@ from ..utils import (
class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'https://app.pluralsight.com'
_GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
_GRAPHQL_EP = f'{_API_BASE}/player/api/graphql'
_GRAPHQL_HEADERS = {
'Content-Type': 'application/json;charset=UTF-8',
}
@@ -95,8 +92,8 @@ query BootstrapPlayer {
response = self._download_json(
self._GRAPHQL_EP, display_id, data=json.dumps({
'query': self._GRAPHQL_COURSE_TMPL % course_id,
'variables': {}
}).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
'variables': {},
}).encode(), headers=self._GRAPHQL_HEADERS)
course = try_get(
response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
@@ -105,7 +102,7 @@ query BootstrapPlayer {
return course
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']['message']),
'{} said: {}'.format(self.IE_NAME, response['error']['message']),
expected=True)
@@ -176,7 +173,7 @@ query viewClip {
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in',
@@ -187,7 +184,7 @@ query viewClip {
r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError(f'Unable to login: {error}', expected=True)
if all(not re.search(p, response) for p in (
r'__INITIAL_STATE__', r'["\']currentUser["\']',
@@ -196,13 +193,12 @@ query viewClip {
BLOCKED = 'Your account has been blocked due to suspicious activity'
if BLOCKED in response:
raise ExtractorError(
'Unable to login: %s' % BLOCKED, expected=True)
f'Unable to login: {BLOCKED}', expected=True)
MUST_AGREE = 'To continue using Pluralsight, you must agree to'
if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
raise ExtractorError(
'Unable to login: %s some documents. Go to pluralsight.com, '
'log in and agree with what Pluralsight requires.'
% MUST_AGREE, expected=True)
f'Unable to login: {MUST_AGREE} some documents. Go to pluralsight.com, '
'log in and agree with what Pluralsight requires.', expected=True)
raise ExtractorError('Unable to log in')
@@ -210,8 +206,7 @@ query viewClip {
captions = None
if clip_id:
captions = self._download_json(
'%s/transcript/api/v1/caption/json/%s/%s'
% (self._API_BASE, clip_id, lang), video_id,
f'{self._API_BASE}/transcript/api/v1/caption/json/{clip_id}/{lang}', video_id,
'Downloading captions JSON', 'Unable to download captions JSON',
fatal=False)
if not captions:
@@ -222,9 +217,9 @@ query viewClip {
'm': name,
}
captions = self._download_json(
'%s/player/retrieve-captions' % self._API_BASE, video_id,
f'{self._API_BASE}/player/retrieve-captions', video_id,
'Downloading captions JSON', 'Unable to download captions JSON',
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
fatal=False, data=json.dumps(captions_post).encode(),
headers={'Content-Type': 'application/json;charset=utf-8'})
if captions:
return {
@@ -234,7 +229,7 @@ query viewClip {
}, {
'ext': 'srt',
'data': self._convert_subtitles(duration, captions),
}]
}],
}
@staticmethod
@@ -255,10 +250,8 @@ query viewClip {
continue
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
f'{num}',
f'{srt_subtitles_timecode(start)} --> {srt_subtitles_timecode(end)}',
text,
os.linesep,
))
@@ -272,10 +265,10 @@ query viewClip {
clip_idx = qs.get('clip', [None])[0]
course_name = qs.get('course', [None])[0]
if any(not f for f in (author, name, clip_idx, course_name,)):
if any(not f for f in (author, name, clip_idx, course_name)):
raise ExtractorError('Invalid URL', expected=True)
display_id = '%s-%s' % (name, clip_idx)
display_id = f'{name}-{clip_idx}'
course = self._download_course(course_name, url, display_id)
@@ -291,7 +284,7 @@ query viewClip {
clip_index = clip_.get('index')
if clip_index is None:
continue
if compat_str(clip_index) == clip_idx:
if str(clip_index) == clip_idx:
clip = clip_
break
@@ -308,14 +301,14 @@ query viewClip {
'high-widescreen': {'width': 1280, 'height': 720},
}
QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen')
quality_key = qualities(QUALITIES_PREFERENCE)
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = (
AllowedQuality('webm', ['high', ]),
AllowedQuality('mp4', ['low', 'medium', 'high', ]),
AllowedQuality('webm', ['high']),
AllowedQuality('mp4', ['low', 'medium', 'high']),
)
# Some courses also offer widescreen resolution for high quality (see
@@ -359,23 +352,23 @@ query viewClip {
'mediaType': ext,
'quality': '%dx%d' % (f['width'], f['height']),
}
format_id = '%s-%s' % (ext, quality)
format_id = f'{ext}-{quality}'
try:
viewclip = self._download_json(
self._GRAPHQL_EP, display_id,
'Downloading %s viewclip graphql' % format_id,
f'Downloading {format_id} viewclip graphql',
data=json.dumps({
'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
'variables': {}
}).encode('utf-8'),
'variables': {},
}).encode(),
headers=self._GRAPHQL_HEADERS)['data']['viewClip']
except ExtractorError:
# Still works but most likely will go soon
viewclip = self._download_json(
'%s/video/clips/viewclip' % self._API_BASE, display_id,
'Downloading %s viewclip JSON' % format_id, fatal=False,
data=json.dumps(clip_post).encode('utf-8'),
f'{self._API_BASE}/video/clips/viewclip', display_id,
f'Downloading {format_id} viewclip JSON', fatal=False,
data=json.dumps(clip_post).encode(),
headers={'Content-Type': 'application/json;charset=utf-8'})
# Pluralsight tracks multiple sequential calls to ViewClip API and start
@@ -404,7 +397,7 @@ query viewClip {
clip_f.update({
'url': clip_url,
'ext': ext,
'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
'format_id': f'{format_id}-{cdn}' if cdn else format_id,
'quality': quality_key(quality),
'source_preference': int_or_none(clip_url_data.get('rank')),
})
@@ -472,7 +465,7 @@ class PluralsightCourseIE(PluralsightBaseIE):
if clip_index is None:
continue
clip_url = update_url_query(
'%s/player' % self._API_BASE, query={
f'{self._API_BASE}/player', query={
'mode': 'live',
'course': course_name,
'author': author,