Upgrade yt_dlp and download script

2025-05-02 16:11:08 -05:00
parent 3a2e8eeb08
commit d68d9ce4f9
1194 changed files with 60099 additions and 44436 deletions
--- a/plugins/youtube_download/yt_dlp/extractor/cda.py
+++ b/plugins/youtube_download/yt_dlp/extractor/cda.py
@@ -1,27 +1,29 @@
 import base64
 import codecs
-import datetime
+import datetime as dt
 import hashlib
 import hmac
 import json
 import random
 import re
+import urllib.parse

 from .common import InfoExtractor
-from ..compat import compat_ord, compat_urllib_parse_unquote
+from ..compat import compat_ord
 from ..utils import (
    ExtractorError,
+    OnDemandPagedList,
+    determine_ext,
    float_or_none,
    int_or_none,
    merge_dicts,
    multipart_encode,
    parse_duration,
-    random_birthday,
-    traverse_obj,
    try_call,
-    try_get,
+    url_or_none,
    urljoin,
 )
+from ..utils.traversal import traverse_obj


 class CDAIE(InfoExtractor):
@@ -52,7 +54,7 @@ class CDAIE(InfoExtractor):
            'age_limit': 0,
            'upload_date': '20160221',
            'timestamp': 1456078244,
-        }
+        },
    }, {
        'url': 'http://www.cda.pl/video/57413289',
        'md5': 'a88828770a8310fc00be6c95faf7f4d5',
@@ -63,26 +65,47 @@ class CDAIE(InfoExtractor):
            'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'crash404',
-            'view_count': int,
            'average_rating': float,
            'duration': 137,
            'age_limit': 0,
-        }
+            'upload_date': '20160220',
+            'timestamp': 1455968218,
+        },
    }, {
-        # Age-restricted
-        'url': 'http://www.cda.pl/video/1273454c4',
+        # Age-restricted with vfilm redirection
+        'url': 'https://www.cda.pl/video/8753244c4',
+        'md5': 'd8eeb83d63611289507010d3df3bb8b3',
        'info_dict': {
-            'id': '1273454c4',
+            'id': '8753244c4',
            'ext': 'mp4',
-            'title': 'Bronson (2008) napisy HD 1080p',
-            'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+            'title': '[18+] Bez Filtra: Rezerwowe Psy czyli...  najwulgarniejsza polska gra?',
+            'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
            'height': 1080,
-            'uploader': 'boniek61',
+            'uploader': 'arhn eu',
            'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 5554,
+            'duration': 991,
            'age_limit': 18,
-            'view_count': int,
            'average_rating': float,
+            'timestamp': 1633888264,
+            'upload_date': '20211010',
+        },
+    }, {
+        # Age-restricted without vfilm redirection
+        'url': 'https://www.cda.pl/video/17028157b8',
+        'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
+        'info_dict': {
+            'id': '17028157b8',
+            'ext': 'mp4',
+            'title': 'STENDUPY MICHAŁ OGIŃSKI',
+            'description': 'md5:5851f3272bfc31f762d616040a1d609a',
+            'height': 480,
+            'uploader': 'oginski',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 18855,
+            'age_limit': 18,
+            'average_rating': float,
+            'timestamp': 1699705901,
+            'upload_date': '20231111',
        },
    }, {
        'url': 'http://ebd.cda.pl/0x0/5749950c',
@@ -90,21 +113,16 @@ class CDAIE(InfoExtractor):
    }]

    def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
-        form_data = random_birthday('rok', 'miesiac', 'dzien')
-        form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
-        data, content_type = multipart_encode(form_data)
+        data, content_type = multipart_encode({'age_confirm': ''})
        return self._download_webpage(
-            urljoin(url, '/a/validatebirth'), video_id, *args,
+            url, video_id, *args,
            data=data, headers={
                'Referer': url,
                'Content-Type': content_type,
            }, **kwargs)

    def _perform_login(self, username, password):
-        app_version = random.choice((
-            '1.2.88 build 15306',
-            '1.2.174 build 18469',
-        ))
+        app_version = '1.2.255 build 21541'
        android_version = random.randrange(8, 14)
        phone_model = random.choice((
            # x-kom.pl top selling Android smartphones, as of 2022-12-26
@@ -134,7 +152,7 @@ class CDAIE(InfoExtractor):
        self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'

        cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
-        if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
+        if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
            self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
            return

@@ -154,7 +172,7 @@ class CDAIE(InfoExtractor):
            })
        self.cache.store(self._BEARER_CACHE, username, {
            'token': token_res['access_token'],
-            'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
+            'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
        })
        self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'

@@ -164,13 +182,13 @@ class CDAIE(InfoExtractor):
        if 'Authorization' in self._API_HEADERS:
            return self._api_extract(video_id)
        else:
-            return self._web_extract(video_id, url)
+            return self._web_extract(video_id)

    def _api_extract(self, video_id):
        meta = self._download_json(
            f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']

-        uploader = traverse_obj(meta, 'author', 'login')
+        uploader = traverse_obj(meta, ('author', 'login', {str}))

        formats = [{
            'url': quality['file'],
@@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
            'view_count': meta.get('views'),
        }

-    def _web_extract(self, video_id, url):
+    def _web_extract(self, video_id):
        self._set_cookie('cda.pl', 'cda.player', 'html5')
-        webpage = self._download_webpage(
+        webpage, urlh = self._download_webpage_handle(
            f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)

        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
@@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
            self.raise_geo_restricted()

        need_confirm_age = False
-        if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+        if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
                                   webpage, 'birthday validate form', default=None):
            webpage = self._download_age_confirm_page(
-                url, video_id, note='Confirming age')
+                urlh.url, video_id, note='Confirming age')
            need_confirm_age = True

        formats = []
@@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
            (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
            <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
        ''', webpage, 'uploader', default=None, group='uploader')
-        view_count = self._search_regex(
-            r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
-            'view_count', default=None)
        average_rating = self._search_regex(
            (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
             r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
@@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'uploader': uploader,
-            'view_count': int_or_none(view_count),
            'average_rating': float_or_none(average_rating),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
@@ -249,7 +263,7 @@ class CDAIE(InfoExtractor):
        def decrypt_file(a):
            for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
                a = a.replace(p, '')
-            a = compat_urllib_parse_unquote(a)
+            a = urllib.parse.unquote(a)
            b = []
            for c in a:
                f = compat_ord(c)
@@ -266,49 +280,62 @@ class CDAIE(InfoExtractor):
        def extract_format(page, version):
            json_str = self._html_search_regex(
                r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
-                '%s player_json' % version, fatal=False, group='player_data')
+                f'{version} player_json', fatal=False, group='player_data')
            if not json_str:
                return
            player_data = self._parse_json(
-                json_str, '%s player_data' % version, fatal=False)
+                json_str, f'{version} player_data', fatal=False)
            if not player_data:
                return
            video = player_data.get('video')
            if not video or 'file' not in video:
-                self.report_warning('Unable to extract %s version information' % version)
+                self.report_warning(f'Unable to extract {version} version information')
                return
-            if video['file'].startswith('uggc'):
-                video['file'] = codecs.decode(video['file'], 'rot_13')
-                if video['file'].endswith('adc.mp4'):
-                    video['file'] = video['file'].replace('adc.mp4', '.mp4')
-            elif not video['file'].startswith('http'):
-                video['file'] = decrypt_file(video['file'])
            video_quality = video.get('quality')
            qualities = video.get('qualities', {})
            video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
-            info_dict['formats'].append({
-                'url': video['file'],
-                'format_id': video_quality,
-                'height': int_or_none(video_quality[:-1]),
-            })
+            if video.get('file'):
+                if video['file'].startswith('uggc'):
+                    video['file'] = codecs.decode(video['file'], 'rot_13')
+                    if video['file'].endswith('adc.mp4'):
+                        video['file'] = video['file'].replace('adc.mp4', '.mp4')
+                elif not video['file'].startswith('http'):
+                    video['file'] = decrypt_file(video['file'])
+                info_dict['formats'].append({
+                    'url': video['file'],
+                    'format_id': video_quality,
+                    'height': int_or_none(video_quality[:-1]),
+                })
            for quality, cda_quality in qualities.items():
                if quality == video_quality:
                    continue
                data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
                        'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
-                data = json.dumps(data).encode('utf-8')
-                video_url = self._download_json(
+                data = json.dumps(data).encode()
+                response = self._download_json(
                    f'https://www.cda.pl/video/{video_id}', video_id, headers={
                        'Content-Type': 'application/json',
-                        'X-Requested-With': 'XMLHttpRequest'
+                        'X-Requested-With': 'XMLHttpRequest',
                    }, data=data, note=f'Fetching {quality} url',
                    errnote=f'Failed to fetch {quality} url', fatal=False)
-                if try_get(video_url, lambda x: x['result']['status']) == 'ok':
-                    video_url = try_get(video_url, lambda x: x['result']['resp'])
+                if (
+                    traverse_obj(response, ('result', 'status')) != 'ok'
+                    or not traverse_obj(response, ('result', 'resp', {url_or_none}))
+                ):
+                    continue
+                video_url = response['result']['resp']
+                ext = determine_ext(video_url)
+                if ext == 'mpd':
+                    info_dict['formats'].extend(self._extract_mpd_formats(
+                        video_url, video_id, mpd_id='dash', fatal=False))
+                elif ext == 'm3u8':
+                    info_dict['formats'].extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                else:
                    info_dict['formats'].append({
                        'url': video_url,
                        'format_id': quality,
-                        'height': int_or_none(quality[:-1])
+                        'height': int_or_none(quality[:-1]),
                    })

            if not info_dict['duration']:
@@ -326,13 +353,63 @@ class CDAIE(InfoExtractor):

            webpage = handler(
                urljoin(self._BASE_URL, href), video_id,
-                'Downloading %s version information' % resolution, fatal=False)
+                f'Downloading {resolution} version information', fatal=False)
            if not webpage:
                # Manually report warning because empty page is returned when
                # invalid version is requested.
-                self.report_warning('Unable to download %s version information' % resolution)
+                self.report_warning(f'Unable to download {resolution} version information')
                continue

            extract_format(webpage, resolution)

        return merge_dicts(info_dict, info)
+
+
+class CDAFolderIE(InfoExtractor):
+    _MAX_PAGE_SIZE = 36
+    _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'https://www.cda.pl/domino264/folder/31188385',
+            'info_dict': {
+                'id': '31188385',
+                'title': 'SERIA DRUGA',
+            },
+            'playlist_mincount': 13,
+        },
+        {
+            'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
+            'info_dict': {
+                'id': '2664592',
+                'title': 'VideoDowcipy - wszystkie odcinki',
+            },
+            'playlist_mincount': 71,
+        },
+        {
+            'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
+            'info_dict': {
+                'id': '19129979',
+                'title': 'TESTY KOSMETYKÓW',
+            },
+            'playlist_mincount': 139,
+        }, {
+            'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
+            'only_matching': True,
+        }]
+
+    def _real_extract(self, url):
+        folder_id, channel = self._match_valid_url(url).group('id', 'channel')
+
+        webpage = self._download_webpage(url, folder_id)
+
+        def extract_page_entries(page):
+            webpage = self._download_webpage(
+                f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
+                f'Downloading page {page + 1}', expected_status=404)
+            items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
+            for video_id in items:
+                yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
+
+        return self.playlist_result(
+            OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
+            folder_id, self._og_search_title(webpage))