Plugin cleanup and tweaks

This commit is contained in:
itdominator 2023-02-20 19:18:45 -06:00
parent 372e4ff3dc
commit 3ad9e1c7bb
1138 changed files with 48878 additions and 40445 deletions

View File

@ -1,8 +1,5 @@
# Python imports
import os
import threading
import subprocess
import inspect
import shlex
# Lib imports
@ -14,28 +11,16 @@ from gi.repository import Gtk
from plugins.plugin_base import PluginBase
# NOTE: Threads WILL NOT die with parent's destruction.
def threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/archiver.glade"
self.name = "Archiver" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/archiver.glade"
self._archiver_dialogue = None
self._arc_command_buffer = None
@ -67,20 +52,9 @@ class Plugin(PluginBase):
def generate_reference_ui_element(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._archiver_dialogue = self._builder.get_object("archiver_dialogue")
self._arc_command_buffer = self._builder.get_object("arc_command_buffer")

View File

@ -2,7 +2,6 @@
import os
import subprocess
import time
import inspect
# Lib imports
import gi
@ -29,20 +28,9 @@ class Plugin(PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._du_dialog = self._builder.get_object("du_dialog")
self._du_store = self._builder.get_object("du_store")

View File

@ -1,6 +1,5 @@
# Python imports
import os
import inspect
import json
# Lib imports
@ -31,20 +30,9 @@ class Plugin(PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._favorites_dialog = self._builder.get_object("favorites_dialog")
self._favorites_store = self._builder.get_object("favorites_store")

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Generated with glade 3.38.2 -->
<!-- Generated with glade 3.40.0 -->
<interface>
<requires lib="gtk+" version="3.16"/>
<object class="GtkDialog" id="file_properties_dialog">
@ -14,7 +14,6 @@
<property name="skip-taskbar-hint">True</property>
<property name="skip-pager-hint">True</property>
<property name="gravity">center</property>
<signal name="response" handler="on_filePropertiesDlg_response" swapped="no"/>
<child internal-child="vbox">
<object class="GtkBox" id="dialog_vbox">
<property name="visible">True</property>

View File

@ -24,12 +24,6 @@ def threaded(fn):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
@ -51,10 +45,10 @@ class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/file_properties.glade"
self.name = "Properties" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/file_properties.glade"
self._properties_dialog = None
self._file_name = None
@ -91,8 +85,9 @@ class Plugin(PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
self._connect_builder_signals(self, self._builder)
self._properties_dialog = self._builder.get_object("file_properties_dialog")
self._file_name = self._builder.get_object("file_name")

View File

@ -1,5 +1,8 @@
# Python imports
import os, threading, subprocess, time
import os
import threading
import subprocess
import time
# Lib imports
import gi
@ -16,12 +19,6 @@ def threaded(fn):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
@ -29,9 +26,9 @@ class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self.name = "Git Clone" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.path = os.path.dirname(os.path.realpath(__file__))
def generate_reference_ui_element(self):
button = Gtk.Button(label=self.name)

View File

@ -2,7 +2,6 @@
import os
import threading
import subprocess
import inspect
import requests
import shutil
@ -38,9 +37,9 @@ class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self.name = "Movie/TV Info" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/movie_tv_info.glade"
self._dialog = None
@ -53,20 +52,9 @@ class Plugin(PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._thumbnailer_dialog = self._builder.get_object("info_dialog")
self._overview = self._builder.get_object("textbuffer")

View File

@ -1,7 +1,5 @@
# Python imports
import os
import threading
import inspect
import time
# Lib imports
@ -18,28 +16,14 @@ from .utils.ipc_server import IPCServer
# NOTE: Threads WILL NOT die with parent's destruction.
def threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
class Plugin(IPCServer, FileSearchMixin, GrepSearchMixin, PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self.name = "Search" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.path = os.path.dirname(os.path.realpath(__file__))
self._GLADE_FILE = f"{self.path}/search_dialog.glade"
self.update_list_ui_buffer = ()
@ -59,20 +43,9 @@ class Plugin(IPCServer, FileSearchMixin, GrepSearchMixin, PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._search_dialog = self._builder.get_object("search_dialog")
self.fsearch = self._builder.get_object("fsearch")

View File

@ -2,7 +2,6 @@
import os
import threading
import subprocess
import ime
# Lib imports
import gi
@ -32,8 +31,10 @@ class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.name = "Example Plugin" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self.name = "Example Plugin" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
# self.path = os.path.dirname(os.path.realpath(__file__))
# self._GLADE_FILE = f"{self.path}/glade_file.glade"
def generate_reference_ui_element(self):
@ -42,6 +43,9 @@ class Plugin(PluginBase):
return button
def run(self):
# self._builder = Gtk.Builder()
# self._builder.add_from_file(self._GLADE_FILE)
# self._connect_builder_signals(self, self._builder)
...
def send_message(self, widget=None, eve=None):

View File

@ -0,0 +1,3 @@
"""
Pligin Module
"""

View File

@ -0,0 +1,3 @@
"""
Pligin Package
"""

View File

@ -0,0 +1,12 @@
{
"manifest": {
"name": "Translate",
"author": "ITDominator",
"version": "0.0.1",
"support": "",
"requests": {
"ui_target": "plugin_control_list",
"pass_fm_events": "true"
}
}
}

134
plugins/translate/plugin.py Normal file
View File

@ -0,0 +1,134 @@
# Python imports
import os
import time
import threading
import requests
# Lib imports
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk
from gi.repository import GLib
# Application imports
from plugins.plugin_base import PluginBase
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
class Plugin(PluginBase):
def __init__(self):
super().__init__()
self.path = os.path.dirname(os.path.realpath(__file__))
self.name = "Translate" # NOTE: Need to remove after establishing private bidirectional 1-1 message bus
# where self.name should not be needed for message comms
self._GLADE_FILE = f"{self.path}/translate.glade"
self._link = "https://duckduckgo.com/translation.js?vqd=4-79469202070473384659389009732578528471&query=translate&to=en"
self._headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://duckduckgo.com/',
'Content-Type': 'text/plain',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://duckduckgo.com',
'DNT': '1',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
self._queue_translate = False
self._watcher_running = False
def generate_reference_ui_element(self):
button = Gtk.Button(label=self.name)
button.connect("button-release-event", self._show_translate_page)
return button
def run(self):
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
self._connect_builder_signals(self, self._builder)
self._translate_dialog = self._builder.get_object("translate_dialog")
self._translate_from = self._builder.get_object("translate_from")
self._translate_to = self._builder.get_object("translate_to")
self._translate_from_buffer = self._builder.get_object("translate_from_buffer")
self._translate_to_buffer = self._builder.get_object("translate_to_buffer")
self._detected_language_lbl = self._builder.get_object("detected_language_lbl")
@threaded
def _show_translate_page(self, widget=None, eve=None):
event_system.emit("get_current_state")
state = self._fm_state
self._event_message = None
GLib.idle_add(self._show_ui, (state))
def _show_ui(self, state):
if state.uris and len(state.uris) == 1:
file_name = state.uris[0].split("/")[-1]
self._translate_from_buffer.set_text(file_name)
response = self._translate_dialog.run()
if response in [Gtk.ResponseType.CLOSE, Gtk.ResponseType.CANCEL, Gtk.ResponseType.DELETE_EVENT]:
self._translate_dialog.hide()
self._translate_dialog.hide()
def _pre_translate(self, widget=None, eve=None):
self._queue_translate = True
if not self._watcher_running:
self._watcher_running = True
self.run_translate_watcher()
@daemon_threaded
def run_translate_watcher(self):
while True:
if self._queue_translate:
self._queue_translate = False
time.sleep(1)
# NOTE: Hold call to translate if we're still typing/updating...
if self._queue_translate:
continue
GLib.idle_add(self._translate)
self._watcher_running = False
break
def _translate(self):
start_itr, end_itr = self._translate_from_buffer.get_bounds()
from_translate = self._translate_from_buffer.get_text(start_itr, end_itr, True).encode('utf-8')
if from_translate in ("", None) or self._queue_translate:
return
response = requests.post(self._link, headers=self._headers, data=from_translate)
if response.status_code == 200:
data = response.json()
self._translate_to_buffer.set_text(data["translated"])
if "detected_language" in data.keys():
self._detected_language_lbl.set_label(f"Detected Language: {data['detected_language']}")
else:
msg = f"Could not translate... Response Code: {response.status_code}"
self._translate_to_buffer.set_text(msg)
self._detected_language_lbl.set_label(f"Detected Language:")

View File

@ -0,0 +1,210 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Generated with glade 3.40.0 -->
<interface>
<requires lib="gtk+" version="3.22"/>
<object class="GtkTextBuffer" id="translate_from_buffer">
<signal name="changed" handler="_pre_translate" swapped="no"/>
</object>
<object class="GtkTextBuffer" id="translate_to_buffer"/>
<object class="GtkDialog" id="translate_dialog">
<property name="can-focus">False</property>
<property name="border-width">6</property>
<property name="title" translatable="yes">Translate</property>
<property name="resizable">False</property>
<property name="modal">True</property>
<property name="window-position">center-on-parent</property>
<property name="default-width">620</property>
<property name="default-height">320</property>
<property name="destroy-with-parent">True</property>
<property name="type-hint">dialog</property>
<property name="skip-taskbar-hint">True</property>
<property name="skip-pager-hint">True</property>
<property name="deletable">False</property>
<property name="gravity">center</property>
<child internal-child="vbox">
<object class="GtkBox" id="dialog_vbox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="spacing">12</property>
<child internal-child="action_area">
<object class="GtkButtonBox" id="dialog_action_area">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="layout-style">end</property>
<child>
<object class="GtkButton" id="cancel_button">
<property name="label">gtk-cancel</property>
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="can-default">True</property>
<property name="receives-default">False</property>
<property name="use-stock">True</property>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkButton" id="ok_button">
<property name="label">gtk-close</property>
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="can-default">True</property>
<property name="receives-default">False</property>
<property name="use-stock">True</property>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">False</property>
<property name="pack-type">end</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<child>
<object class="GtkLabel" id="detected_language_lbl">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">Detected Language:</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="spacing">15</property>
<property name="homogeneous">True</property>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">From:</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkScrolledWindow">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property>
<child>
<object class="GtkTextView" id="translate_from">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="wrap-mode">word-char</property>
<property name="buffer">translate_from_buffer</property>
<property name="monospace">True</property>
</object>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">To:</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkScrolledWindow">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property>
<child>
<object class="GtkTextView" id="translate_to">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="editable">False</property>
<property name="wrap-mode">word-char</property>
<property name="cursor-visible">False</property>
<property name="buffer">translate_to_buffer</property>
<property name="monospace">True</property>
</object>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
</object>
</child>
<action-widgets>
<action-widget response="-6">cancel_button</action-widget>
<action-widget response="-7">ok_button</action-widget>
</action-widgets>
</object>
</interface>

View File

@ -1,8 +1,5 @@
# Python imports
import os
import threading
import subprocess
import inspect
# Lib imports
import gi
@ -16,19 +13,6 @@ from plugins.plugin_base import PluginBase
from .xdgtrash import XDGTrash
# NOTE: Threads WILL NOT die with parent's destruction.
def threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
class Plugin(PluginBase):

View File

@ -3,7 +3,6 @@ import os
import threading
import subprocess
import time
import inspect
import hashlib
from datetime import datetime
@ -26,12 +25,6 @@ def threaded(fn):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper
@ -53,20 +46,9 @@ class Plugin(PluginBase):
def run(self):
self._builder = Gtk.Builder()
self._builder = Gtk.Builder()
self._builder.add_from_file(self._GLADE_FILE)
classes = [self]
handlers = {}
for c in classes:
methods = None
try:
methods = inspect.getmembers(c, predicate=inspect.ismethod)
handlers.update(methods)
except Exception as e:
print(repr(e))
self._builder.connect_signals(handlers)
self._connect_builder_signals(self, self._builder)
self._thumbnailer_dialog = self._builder.get_object("thumbnailer_dialog")
self._scrub_step = self._builder.get_object("scrub_step")

View File

@ -1,5 +1,8 @@
# Python imports
import os, threading, subprocess, time
import os
import threading
import subprocess
import time
# Lib imports
import gi
@ -16,12 +19,6 @@ def threaded(fn):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=False).start()
return wrapper
# NOTE: Threads WILL die with parent's destruction.
def daemon_threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs, daemon=True).start()
return wrapper

View File

@ -1,24 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +0,0 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Root-Is-Purelib: true
Tag: py2-none-any
Tag: py3-none-any

View File

@ -1,3 +0,0 @@
[console_scripts]
yt-dlp = yt_dlp:main

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,11 @@
#!/usr/bin/env python3
from __future__ import unicode_literals
# Execute with
# $ python yt_dlp/__main__.py (2.6+)
# $ python -m yt_dlp (2.7+)
# $ python -m yt_dlp
import sys
if __package__ is None and not hasattr(sys, 'frozen'):
if __package__ is None and not getattr(sys, 'frozen', False):
# direct call of __main__.py
import os.path
path = os.path.realpath(os.path.abspath(__file__))

View File

@ -0,0 +1,5 @@
import os
def get_hook_dirs():
return [os.path.dirname(__file__)]

View File

@ -0,0 +1,57 @@
import ast
import os
import sys
from pathlib import Path
from PyInstaller.utils.hooks import collect_submodules
def find_attribute_accesses(node, name, path=()):
if isinstance(node, ast.Attribute):
path = [*path, node.attr]
if isinstance(node.value, ast.Name) and node.value.id == name:
yield path[::-1]
for child in ast.iter_child_nodes(node):
yield from find_attribute_accesses(child, name, path)
def collect_used_submodules(name, level):
for dirpath, _, filenames in os.walk(Path(__file__).parent.parent):
for filename in filenames:
if not filename.endswith('.py'):
continue
with open(Path(dirpath) / filename, encoding='utf8') as f:
for submodule in find_attribute_accesses(ast.parse(f.read()), name):
yield '.'.join(submodule[:level])
def pycryptodome_module():
try:
import Cryptodome # noqa: F401
except ImportError:
try:
import Crypto # noqa: F401
print('WARNING: Using Crypto since Cryptodome is not available. '
'Install with: pip install pycryptodomex', file=sys.stderr)
return 'Crypto'
except ImportError:
pass
return 'Cryptodome'
def get_hidden_imports():
yield 'yt_dlp.compat._legacy'
yield from collect_submodules('websockets')
crypto = pycryptodome_module()
for sm in set(collect_used_submodules('Cryptodome', 2)):
yield f'{crypto}.{sm}'
# These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi')
hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']

View File

@ -1,26 +1,18 @@
from __future__ import unicode_literals
import base64
from math import ceil
from .compat import (
compat_b64decode,
compat_ord,
compat_pycrypto_AES,
)
from .utils import (
bytes_to_intlist,
intlist_to_bytes,
)
from .compat import compat_ord
from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes
if compat_pycrypto_AES:
if Cryptodome:
def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using pycryptodome """
return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data)
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data)
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using pycryptodome """
return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
else:
def aes_cbc_decrypt_bytes(data, key, iv):
@ -32,16 +24,59 @@ else:
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])]
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16
def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])]
def pkcs7_padding(data):
"""
PKCS#7 padding
@param {int[]} data cleartext
@returns {int[]} padding data
"""
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
return data + [remaining_length] * remaining_length
def pad_block(block, padding_mode):
"""
Pad a block with the given padding mode
@param {int[]} block block to pad
@param padding_mode padding mode
"""
padding_size = BLOCK_SIZE_BYTES - len(block)
PADDING_BYTE = {
'pkcs7': padding_size,
'iso7816': 0x0,
'whitespace': 0x20,
'zero': 0x0,
}
if padding_size < 0:
raise ValueError('Block size exceeded')
elif padding_mode not in PADDING_BYTE:
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
if padding_mode == 'iso7816' and padding_size:
block = block + [0x80] # NB: += mutates list
padding_size -= 1
return block + [PADDING_BYTE[padding_mode]] * padding_size
def aes_ecb_encrypt(data, key, iv=None):
"""
Encrypt with aes in ECB mode
Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@ -54,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None):
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_encrypt(block, expanded_key)
encrypted_data = encrypted_data[:len(data)]
encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
return encrypted_data
@ -145,13 +179,14 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data
def aes_cbc_encrypt(data, key, iv):
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
"""
Encrypt with aes in CBC mode. Using PKCS#7 padding
Encrypt with aes in CBC mode
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@param padding_mode Padding mode to use
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
@ -161,8 +196,8 @@ def aes_cbc_encrypt(data, key, iv):
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
remaining_length = BLOCK_SIZE_BYTES - len(block)
block += [remaining_length] * remaining_length
block = pad_block(block, padding_mode)
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
@ -273,8 +308,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(compat_b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@ -503,20 +538,30 @@ def ghash(subkey, data):
last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES):
block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203
block = data[i: i + BLOCK_SIZE_BYTES]
last_y = block_product(xor(last_y, block), subkey)
return last_y
__all__ = [
'aes_ctr_decrypt',
'aes_cbc_decrypt',
'aes_cbc_decrypt_bytes',
'aes_ctr_decrypt',
'aes_decrypt_text',
'aes_encrypt',
'aes_decrypt',
'aes_ecb_decrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'aes_cbc_encrypt',
'aes_cbc_encrypt_bytes',
'aes_ctr_encrypt',
'aes_ecb_encrypt',
'aes_encrypt',
'key_expansion',
'pad_block',
'pkcs7_padding',
'unpad_pkcs7',
]

View File

@ -1,37 +1,31 @@
from __future__ import unicode_literals
import contextlib
import errno
import io
import json
import os
import re
import shutil
import traceback
import urllib.parse
from .compat import compat_getenv
from .utils import (
expand_path,
write_json_file,
)
from .utils import expand_path, traverse_obj, version_tuple, write_json_file
from .version import __version__
class Cache(object):
class Cache:
def __init__(self, ydl):
self._ydl = ydl
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'yt-dlp')
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
'invalid section %r' % section
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
return os.path.join(
self._get_root_dir(), section, '%s.%s' % (key, dtype))
assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
@property
def enabled(self):
@ -51,33 +45,37 @@ class Cache(object):
if ose.errno != errno.EEXIST:
raise
self._ydl.write_debug(f'Saving {section}.{key} to cache')
write_json_file(data, fn)
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
def load(self, section, key, dtype='json', default=None):
def _validate(self, data, min_ver):
version = traverse_obj(data, 'yt-dlp_version')
if not version: # Backward compatibility
data, version = {'data': data}, '2022.08.19'
if not min_ver or version_tuple(version) >= version_tuple(min_ver):
return data['data']
self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
def load(self, section, key, dtype='json', default=None, *, min_ver=None):
assert dtype in ('json',)
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
try:
with contextlib.suppress(OSError):
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
with open(cache_fn, encoding='utf-8') as cachef:
self._ydl.write_debug(f'Loading {section}.{key} from cache')
return json.load(cachef)
except ValueError:
return self._validate(json.load(cachef), min_ver)
except (ValueError, KeyError):
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
except OSError as oe:
file_size = str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:
pass # No cache available
self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})')
return default

View File

@ -1,311 +0,0 @@
# coding: utf-8
import asyncio
import base64
import collections
import ctypes
import getpass
import html
import html.parser
import http
import http.client
import http.cookiejar
import http.cookies
import http.server
import itertools
import optparse
import os
import re
import shlex
import shutil
import socket
import struct
import subprocess
import sys
import tokenize
import urllib
import xml.etree.ElementTree as etree
from subprocess import DEVNULL
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling
class compat_HTMLParseError(Exception):
pass
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s):
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
from shlex import quote as compat_shlex_quote
def compat_ord(c):
if type(c) is int:
return c
else:
return ord(c)
def compat_setenv(key, value, env=os.environ):
env[key] = value
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return path
else:
compat_realpath = os.path.realpath
def compat_print(s):
assert isinstance(s, compat_str)
print(s)
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
op = optparse.OptionParser()
og = optparse.OptionGroup(op, 'foo')
try:
og.add_option('-t')
except TypeError:
real_add_option = optparse.OptionGroup.add_option
def _compat_add_option(self, *args, **kwargs):
enc = lambda v: (
v.encode('ascii', 'replace') if isinstance(v, compat_str)
else v)
bargs = [enc(a) for a in args]
bkwargs = dict(
(k, enc(v)) for k, v in kwargs.items())
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
try:
compat_Pattern = re.Pattern
except AttributeError:
compat_Pattern = type(re.compile(''))
try:
compat_Match = re.Match
except AttributeError:
compat_Match = type(re.compile('').match(''))
try:
compat_asyncio_run = asyncio.run # >= 3.7
except AttributeError:
def compat_asyncio_run(coro):
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(coro)
asyncio.run = compat_asyncio_run
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ:
_userhome = os.environ['HOME']
def compat_expanduser(path):
if not path.startswith('~'):
return path
i = path.replace('\\', '/', 1).find('/') # ~user
if i < 0:
i = len(path)
userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome
return userhome + path[i:]
else:
compat_expanduser = os.path.expanduser
try:
from Cryptodome.Cipher import AES as compat_pycrypto_AES
except ImportError:
try:
from Crypto.Cipher import AES as compat_pycrypto_AES
except ImportError:
compat_pycrypto_AES = None
WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
if compat_os_name != 'nt':
return
global WINDOWS_VT_MODE
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
try:
subprocess.Popen('', shell=True, startupinfo=startupinfo)
WINDOWS_VT_MODE = True
except Exception:
pass
# Deprecated
compat_basestring = str
compat_chr = chr
compat_filter = filter
compat_input = input
compat_integer_types = (int, )
compat_kwargs = lambda kwargs: kwargs
compat_map = map
compat_numeric_types = (int, float, complex)
compat_str = str
compat_xpath = lambda xpath: xpath
compat_zip = zip
compat_collections_abc = collections.abc
compat_HTMLParser = html.parser.HTMLParser
compat_HTTPError = urllib.error.HTTPError
compat_Struct = struct.Struct
compat_b64decode = base64.b64decode
compat_cookiejar = http.cookiejar
compat_cookiejar_Cookie = compat_cookiejar.Cookie
compat_cookies = http.cookies
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
compat_etree_Element = etree.Element
compat_etree_register_namespace = etree.register_namespace
compat_get_terminal_size = shutil.get_terminal_size
compat_getenv = os.getenv
compat_getpass = getpass.getpass
compat_html_entities = html.entities
compat_html_entities_html5 = compat_html_entities.html5
compat_http_client = http.client
compat_http_server = http.server
compat_itertools_count = itertools.count
compat_parse_qs = urllib.parse.parse_qs
compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_parse = urllib.parse
compat_urllib_parse_quote = urllib.parse.quote
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
compat_urllib_request = urllib.request
compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_response = urllib.response
compat_urlparse = urllib.parse
compat_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = etree.ParseError
# Set public objects
__all__ = [
'WINDOWS_VT_MODE',
'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
'compat_Match',
'compat_Pattern',
'compat_Struct',
'compat_asyncio_run',
'compat_b64decode',
'compat_basestring',
'compat_chr',
'compat_collections_abc',
'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies',
'compat_cookies_SimpleCookie',
'compat_ctypes_WINFUNCTYPE',
'compat_etree_Element',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
'compat_filter',
'compat_get_terminal_size',
'compat_getenv',
'compat_getpass',
'compat_html_entities',
'compat_html_entities_html5',
'compat_http_client',
'compat_http_server',
'compat_input',
'compat_integer_types',
'compat_itertools_count',
'compat_kwargs',
'compat_map',
'compat_numeric_types',
'compat_ord',
'compat_os_name',
'compat_parse_qs',
'compat_print',
'compat_pycrypto_AES',
'compat_realpath',
'compat_setenv',
'compat_shlex_quote',
'compat_shlex_split',
'compat_socket_create_connection',
'compat_str',
'compat_struct_pack',
'compat_struct_unpack',
'compat_subprocess_get_DEVNULL',
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
'compat_urllib_parse_quote',
'compat_urllib_parse_quote_plus',
'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus',
'compat_urllib_parse_unquote_to_bytes',
'compat_urllib_parse_urlencode',
'compat_urllib_parse_urlparse',
'compat_urllib_parse_urlunparse',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',
'compat_urlparse',
'compat_urlretrieve',
'compat_xml_parse_error',
'compat_xpath',
'compat_zip',
'windows_enable_vt_mode',
'workaround_optparse_bug9161',
]

View File

@ -0,0 +1,72 @@
import os
import sys
import warnings
import xml.etree.ElementTree as etree
from ._deprecated import * # noqa: F401, F403
from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling
class compat_HTMLParseError(ValueError):
pass
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s):
import re
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
from shlex import quote as compat_shlex_quote # noqa: F401
def compat_ord(c):
return c if isinstance(c, int) else ord(c)
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce'):
def compat_expanduser(path):
HOME = os.environ.get('HOME')
if not HOME:
return os.path.expanduser(path)
elif not path.startswith('~'):
return path
i = path.replace('\\', '/', 1).find('/') # ~user
if i < 0:
i = len(path)
userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME
return userhome + path[i:]
else:
compat_expanduser = os.path.expanduser

View File

@ -0,0 +1,16 @@
"""Deprecated - New code should avoid these"""
import base64
import urllib.error
import urllib.parse
compat_str = str
compat_b64decode = base64.b64decode
compat_HTTPError = urllib.error.HTTPError
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse

View File

@ -0,0 +1,97 @@
""" Do not use! """
import collections
import ctypes
import getpass
import html.entities
import html.parser
import http.client
import http.cookiejar
import http.cookies
import http.server
import itertools
import os
import shlex
import shutil
import socket
import struct
import tokenize
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree as etree
from subprocess import DEVNULL
# isort: split
import asyncio # noqa: F401
import re # noqa: F401
from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401
from .compat_utils import passthrough_module
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
def compat_setenv(key, value, env=os.environ):
env[key] = value
compat_basestring = str
compat_casefold = str.casefold
compat_chr = chr
compat_collections_abc = collections.abc
compat_cookiejar = http.cookiejar
compat_cookiejar_Cookie = http.cookiejar.Cookie
compat_cookies = http.cookies
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
compat_etree_Element = etree.Element
compat_etree_register_namespace = etree.register_namespace
compat_filter = filter
compat_get_terminal_size = shutil.get_terminal_size
compat_getenv = os.getenv
compat_getpass = getpass.getpass
compat_html_entities = html.entities
compat_html_entities_html5 = html.entities.html5
compat_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client
compat_http_server = http.server
compat_input = input
compat_integer_types = (int, )
compat_itertools_count = itertools.count
compat_kwargs = lambda kwargs: kwargs
compat_map = map
compat_numeric_types = (int, float, complex)
compat_print = print
compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection
compat_Struct = struct.Struct
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_parse = urllib.parse
compat_urllib_parse_quote = urllib.parse.quote
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
compat_urllib_request = urllib.request
compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_response = urllib.response
compat_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = etree.ParseError
compat_xpath = lambda xpath: xpath
compat_zip = zip
workaround_optparse_bug9161 = lambda: None

View File

@ -0,0 +1,83 @@
import collections
import contextlib
import functools
import importlib
import sys
import types
_NO_ATTRIBUTE = object()
_Package = collections.namedtuple('Package', ('name', 'version'))
def get_package_info(module):
return _Package(
name=getattr(module, '_yt_dlp__identifier', module.__name__),
version=str(next(filter(None, (
getattr(module, attr, None)
for attr in ('__version__', 'version_string', 'version')
)), None)))
def _is_package(module):
return '__path__' in vars(module)
def _is_dunder(name):
return name.startswith('__') and name.endswith('__')
class EnhancedModule(types.ModuleType):
def __bool__(self):
return vars(self).get('__bool__', lambda: True)()
def __getattribute__(self, attr):
try:
ret = super().__getattribute__(attr)
except AttributeError:
if _is_dunder(attr):
raise
getter = getattr(self, '__getattr__', None)
if not getter:
raise
ret = getter(attr)
return ret.fget() if isinstance(ret, property) else ret
def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None):
"""Passthrough parent module into a child module, creating the parent if necessary"""
def __getattr__(attr):
if _is_package(parent):
with contextlib.suppress(ImportError):
return importlib.import_module(f'.{attr}', parent.__name__)
ret = from_child(attr)
if ret is _NO_ATTRIBUTE:
raise AttributeError(f'module {parent.__name__} has no attribute {attr}')
callback(attr)
return ret
@functools.lru_cache(maxsize=None)
def from_child(attr):
nonlocal child
if attr not in allowed_attributes:
if ... not in allowed_attributes or _is_dunder(attr):
return _NO_ATTRIBUTE
if isinstance(child, str):
child = importlib.import_module(child, parent.__name__)
if _is_package(child):
with contextlib.suppress(ImportError):
return passthrough_module(f'{parent.__name__}.{attr}',
importlib.import_module(f'.{attr}', child.__name__))
with contextlib.suppress(AttributeError):
return getattr(child, attr)
return _NO_ATTRIBUTE
parent = sys.modules.get(parent, types.ModuleType(parent))
parent.__class__ = EnhancedModule
parent.__getattr__ = __getattr__
return parent

View File

@ -0,0 +1,26 @@
# flake8: noqa: F405
from functools import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'functools')
del passthrough_module
try:
cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)
try:
cached_property # >= 3.8
except NameError:
class cached_property:
def __init__(self, func):
update_wrapper(self, func)
self.func = func
def __get__(self, instance, _):
if instance is None:
return self
setattr(instance, self.func.__name__, self.func(instance))
return getattr(instance, self.func.__name__)

View File

@ -0,0 +1,16 @@
tests = {
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
}
def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
"""
if h is None:
with open(file, 'rb') as f:
h = f.read(12)
return next((type_ for type_, test in tests.items() if test(h)), None)

View File

@ -0,0 +1,30 @@
# flake8: noqa: F405
from shutil import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'shutil')
del passthrough_module
import sys
if sys.platform.startswith('freebsd'):
import errno
import os
import shutil
# Workaround for PermissionError when using restricted ACL mode on FreeBSD
def copy2(src, dst, *args, **kwargs):
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
shutil.copyfile(src, dst, *args, **kwargs)
try:
shutil.copystat(src, dst, *args, **kwargs)
except PermissionError as e:
if e.errno != getattr(errno, 'EPERM', None):
raise
return dst
def move(*args, copy_function=copy2, **kwargs):
return shutil.move(*args, copy_function=copy_function, **kwargs)

View File

@ -1,12 +1,16 @@
import base64
import contextlib
import ctypes
import http.cookiejar
import http.cookies
import json
import os
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import time
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
@ -16,38 +20,21 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
from .compat import (
compat_b64decode,
compat_cookiejar_Cookie,
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage,
sqlite3,
)
from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
expand_path,
Popen,
YoutubeDLCookieJar,
error_to_str,
expand_path,
is_path_like,
try_call,
)
try:
import sqlite3
SQLITE_AVAILABLE = True
except ImportError:
# although sqlite3 is part of the standard library, it is possible to compile python without
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
SQLITE_AVAILABLE = False
try:
import secretstorage
SECRETSTORAGE_AVAILABLE = True
except ImportError:
SECRETSTORAGE_AVAILABLE = False
SECRETSTORAGE_UNAVAILABLE_REASON = (
'as the `secretstorage` module is not installed. '
'Please install by running `python3 -m pip install secretstorage`.')
except Exception as _err:
SECRETSTORAGE_AVAILABLE = False
SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@ -72,37 +59,72 @@ class YDLLogger:
if self._ydl:
self._ydl.report_error(message)
class ProgressBar(MultilinePrinter):
_DELAY, _timer = 0.1, 0
def print(self, message):
if time.time() - self._timer > self._DELAY:
self.print_at_line(f'[Cookies] {message}', 0)
self._timer = time.time()
def progress_bar(self):
"""Return a context manager with a print method. (Optional)"""
# Do not print to files/pipes, loggers, or when --no-progress is used
if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
return
file = self._ydl._out_files.error
try:
if not file.isatty():
return
except BaseException:
return
return self.ProgressBar(file, preserve_output=False)
def _create_progress_bar(logger):
if hasattr(logger, 'progress_bar'):
printer = logger.progress_bar()
if printer:
return printer
printer = QuietMultilinePrinter()
printer.print = lambda _: None
return printer
def load_cookies(cookie_file, browser_specification, ydl):
cookie_jars = []
if browser_specification is not None:
browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
cookie_jars.append(
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
if cookie_file is not None:
cookie_file = expand_path(cookie_file)
is_filename = is_path_like(cookie_file)
if is_filename:
cookie_file = expand_path(cookie_file)
jar = YoutubeDLCookieJar(cookie_file)
if os.access(cookie_file, os.R_OK):
if not is_filename or os.access(cookie_file, os.R_OK):
jar.load(ignore_discard=True, ignore_expires=True)
cookie_jars.append(jar)
return _merge_cookie_jars(cookie_jars)
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
if browser_name == 'firefox':
return _extract_firefox_cookies(profile, logger)
return _extract_firefox_cookies(profile, container, logger)
elif browser_name == 'safari':
return _extract_safari_cookies(profile, logger)
elif browser_name in CHROMIUM_BASED_BROWSERS:
return _extract_chrome_cookies(browser_name, profile, keyring, logger)
else:
raise ValueError('unknown browser: {}'.format(browser_name))
raise ValueError(f'unknown browser: {browser_name}')
def _extract_firefox_cookies(profile, logger):
def _extract_firefox_cookies(profile, container, logger):
logger.info('Extracting cookies from firefox')
if not SQLITE_AVAILABLE:
if not sqlite3:
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar()
@ -114,25 +136,54 @@ def _extract_firefox_cookies(profile, logger):
else:
search_root = os.path.join(_firefox_browser_dir(), profile)
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
if cookie_database_path is None:
raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
container_id = None
if container not in (None, 'none'):
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
raise FileNotFoundError(f'could not read containers.json in {search_root}')
with open(containers_path) as containers:
identities = json.load(containers).get('identities', [])
container_id = next((context.get('userContextId') for context in identities if container in (
context.get('name'),
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
)), None)
if not isinstance(container_id, int):
raise ValueError(f'could not find firefox container "{container}" in containers.json')
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
cursor = None
try:
cursor = _open_database_copy(cookie_database_path, tmpdir)
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
if isinstance(container_id, int):
logger.debug(
f'Only loading cookies from firefox container "{container}", ID {container_id}')
cursor.execute(
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
(f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
elif container == 'none':
logger.debug('Only loading cookies not belonging to any container')
cursor.execute(
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
else:
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
jar = YoutubeDLCookieJar()
for host, name, value, path, expiry, is_secure in cursor.fetchall():
cookie = compat_cookiejar_Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
comment=None, comment_url=None, rest={})
jar.set_cookie(cookie)
logger.info('Extracted {} cookies from firefox'.format(len(jar)))
with _create_progress_bar(logger) as progress_bar:
table = cursor.fetchall()
total_cookie_count = len(table)
for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
cookie = http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
comment=None, comment_url=None, rest={})
jar.set_cookie(cookie)
logger.info(f'Extracted {len(jar)} cookies from firefox')
return jar
finally:
if cursor is not None:
@ -140,39 +191,25 @@ def _extract_firefox_cookies(profile, logger):
def _firefox_browser_dir():
if sys.platform in ('linux', 'linux2'):
return os.path.expanduser('~/.mozilla/firefox')
elif sys.platform == 'win32':
return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox')
else:
raise ValueError('unsupported platform: {}'.format(sys.platform))
return os.path.expanduser('~/.mozilla/firefox')
def _get_chromium_based_browser_settings(browser_name):
# https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
if sys.platform in ('linux', 'linux2'):
config = _config_home()
browser_dir = {
'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
'chrome': os.path.join(config, 'google-chrome'),
'chromium': os.path.join(config, 'chromium'),
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
}[browser_name]
elif sys.platform == 'win32':
if sys.platform in ('cygwin', 'win32'):
appdata_local = os.path.expandvars('%LOCALAPPDATA%')
appdata_roaming = os.path.expandvars('%APPDATA%')
browser_dir = {
'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
}[browser_name]
elif sys.platform == 'darwin':
@ -187,7 +224,15 @@ def _get_chromium_based_browser_settings(browser_name):
}[browser_name]
else:
raise ValueError('unsupported platform: {}'.format(sys.platform))
config = _config_home()
browser_dir = {
'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
'chrome': os.path.join(config, 'google-chrome'),
'chromium': os.path.join(config, 'chromium'),
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
}[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
@ -210,11 +255,11 @@ def _get_chromium_based_browser_settings(browser_name):
def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.info('Extracting cookies from {}'.format(browser_name))
logger.info(f'Extracting cookies from {browser_name}')
if not SQLITE_AVAILABLE:
logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
if not sqlite3:
logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar()
config = _get_chromium_based_browser_settings(browser_name)
@ -228,13 +273,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
if config['supports_profiles']:
search_root = os.path.join(config['browser_dir'], profile)
else:
logger.error('{} does not support profiles'.format(browser_name))
logger.error(f'{browser_name} does not support profiles')
search_root = config['browser_dir']
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
if cookie_database_path is None:
raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
@ -245,45 +290,55 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
cursor.connection.text_factory = bytes
column_names = _get_column_names(cursor, 'cookies')
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
'expires_utc, {} FROM cookies'.format(secure_column))
cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
jar = YoutubeDLCookieJar()
failed_cookies = 0
unencrypted_cookies = 0
for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
host_key = host_key.decode('utf-8')
name = name.decode('utf-8')
value = value.decode('utf-8')
path = path.decode('utf-8')
if not value and encrypted_value:
value = decryptor.decrypt(encrypted_value)
if value is None:
with _create_progress_bar(logger) as progress_bar:
table = cursor.fetchall()
total_cookie_count = len(table)
for i, line in enumerate(table):
progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
if not cookie:
failed_cookies += 1
continue
else:
unencrypted_cookies += 1
cookie = compat_cookiejar_Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
comment=None, comment_url=None, rest={})
jar.set_cookie(cookie)
elif not is_encrypted:
unencrypted_cookies += 1
jar.set_cookie(cookie)
if failed_cookies > 0:
failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
failed_message = f' ({failed_cookies} could not be decrypted)'
else:
failed_message = ''
logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
counts = decryptor.cookie_counts.copy()
logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
counts = decryptor._cookie_counts.copy()
counts['unencrypted'] = unencrypted_cookies
logger.debug('cookie version breakdown: {}'.format(counts))
logger.debug(f'cookie version breakdown: {counts}')
return jar
finally:
if cursor is not None:
cursor.connection.close()
def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
host_key = host_key.decode()
name = name.decode()
value = value.decode()
path = path.decode()
is_encrypted = not value and encrypted_value
if is_encrypted:
value = decryptor.decrypt(encrypted_value)
if value is None:
return is_encrypted, None
return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
comment=None, comment_url=None, rest={})
class ChromeCookieDecryptor:
"""
Overview:
@ -310,24 +365,18 @@ class ChromeCookieDecryptor:
- KeyStorageLinux::CreateService
"""
def decrypt(self, encrypted_value):
raise NotImplementedError
_cookie_counts = {}
@property
def cookie_counts(self):
raise NotImplementedError
def decrypt(self, encrypted_value):
raise NotImplementedError('Must be implemented by sub classes')
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
if sys.platform in ('linux', 'linux2'):
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
elif sys.platform == 'darwin':
if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger)
elif sys.platform == 'win32':
elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger)
else:
raise NotImplementedError('Chrome cookie decryption is not supported '
'on this platform: {}'.format(sys.platform))
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
@ -344,10 +393,6 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
@property
def cookie_counts(self):
return self._cookie_counts
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@ -381,10 +426,6 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
@property
def cookie_counts(self):
return self._cookie_counts
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@ -410,10 +451,6 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
self._v10_key = _get_windows_v10_key(browser_root, logger)
self._cookie_counts = {'v10': 0, 'other': 0}
@property
def cookie_counts(self):
return self._cookie_counts
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@ -442,25 +479,28 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
self._cookie_counts['other'] += 1
# any other prefix means the data is DPAPI encrypted
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
def _extract_safari_cookies(profile, logger):
if profile is not None:
logger.error('safari does not support profiles')
if sys.platform != 'darwin':
raise ValueError('unsupported platform: {}'.format(sys.platform))
raise ValueError(f'unsupported platform: {sys.platform}')
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
if not os.path.isfile(cookies_path):
raise FileNotFoundError('could not find safari cookies database')
logger.debug('Trying secondary cookie location')
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
if not os.path.isfile(cookies_path):
raise FileNotFoundError('could not find safari cookies database')
with open(cookies_path, 'rb') as f:
cookies_data = f.read()
jar = parse_safari_cookies(cookies_data, logger=logger)
logger.info('Extracted {} cookies from safari'.format(len(jar)))
logger.info(f'Extracted {len(jar)} cookies from safari')
return jar
@ -476,7 +516,7 @@ class DataParser:
def read_bytes(self, num_bytes):
if num_bytes < 0:
raise ParserError('invalid read of {} bytes'.format(num_bytes))
raise ParserError(f'invalid read of {num_bytes} bytes')
end = self.cursor + num_bytes
if end > len(self._data):
raise ParserError('reached end of input')
@ -487,7 +527,7 @@ class DataParser:
def expect_bytes(self, expected_value, message):
value = self.read_bytes(len(expected_value))
if value != expected_value:
raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
def read_uint(self, big_endian=False):
data_format = '>I' if big_endian else '<I'
@ -502,16 +542,15 @@ class DataParser:
while True:
c = self.read_bytes(1)
if c == b'\x00':
return b''.join(buffer).decode('utf-8')
return b''.join(buffer).decode()
else:
buffer.append(c)
def skip(self, num_bytes, description='unknown'):
if num_bytes > 0:
self._logger.debug('skipping {} bytes ({}): {}'.format(
num_bytes, description, self.read_bytes(num_bytes)))
self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
elif num_bytes < 0:
raise ParserError('invalid skip of {} bytes'.format(num_bytes))
raise ParserError(f'invalid skip of {num_bytes} bytes')
def skip_to(self, offset, description='unknown'):
self.skip(offset - self.cursor, description)
@ -538,15 +577,17 @@ def _parse_safari_cookies_page(data, jar, logger):
number_of_cookies = p.read_uint()
record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
if number_of_cookies == 0:
logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
logger.debug(f'a cookies page of size {len(data)} has no cookies')
return
p.skip_to(record_offsets[0], 'unknown page header field')
for record_offset in record_offsets:
p.skip_to(record_offset, 'space between records')
record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
p.read_bytes(record_length)
with _create_progress_bar(logger) as progress_bar:
for i, record_offset in enumerate(record_offsets):
progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
p.skip_to(record_offset, 'space between records')
record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
p.read_bytes(record_length)
p.skip_to_end('space in between pages')
@ -583,7 +624,7 @@ def _parse_safari_cookies_record(data, jar, logger):
p.skip_to(record_size, 'space at the end of the record')
cookie = compat_cookiejar_Cookie(
cookie = http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
@ -682,7 +723,7 @@ def _choose_linux_keyring(logger):
SelectBackend
"""
desktop_environment = _get_linux_desktop_environment(os.environ)
logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
logger.debug(f'detected desktop environment: {desktop_environment.name}')
if desktop_environment == _LinuxDesktopEnvironment.KDE:
linux_keyring = _LinuxKeyring.KWALLET
elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
@ -703,23 +744,21 @@ def _get_kwallet_network_wallet(logger):
"""
default_wallet = 'kdewallet'
try:
proc = Popen([
stdout, _, returncode = Popen.run([
'dbus-send', '--session', '--print-reply=literal',
'--dest=org.kde.kwalletd5',
'/modules/kwalletd5',
'org.kde.KWallet.networkWallet'
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if proc.returncode != 0:
if returncode:
logger.warning('failed to read NetworkWallet')
return default_wallet
else:
network_wallet = stdout.decode('utf-8').strip()
logger.debug('NetworkWallet = "{}"'.format(network_wallet))
return network_wallet
except BaseException as e:
logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
logger.debug(f'NetworkWallet = "{stdout.strip()}"')
return stdout.strip()
except Exception as e:
logger.warning(f'exception while obtaining NetworkWallet: {e}')
return default_wallet
@ -735,17 +774,16 @@ def _get_kwallet_password(browser_keyring_name, logger):
network_wallet = _get_kwallet_network_wallet(logger)
try:
proc = Popen([
stdout, _, returncode = Popen.run([
'kwallet-query',
'--read-password', '{} Safe Storage'.format(browser_keyring_name),
'--folder', '{} Keys'.format(browser_keyring_name),
'--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys',
network_wallet
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if proc.returncode != 0:
logger.error('kwallet-query failed with return code {}. Please consult '
'the kwallet-query man page for details'.format(proc.returncode))
if returncode:
logger.error(f'kwallet-query failed with return code {returncode}. '
'Please consult the kwallet-query man page for details')
return b''
else:
if stdout.lower().startswith(b'failed to read'):
@ -760,17 +798,15 @@ def _get_kwallet_password(browser_keyring_name, logger):
return b''
else:
logger.debug('password found')
if stdout[-1:] == b'\n':
stdout = stdout[:-1]
return stdout
except BaseException as e:
logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
return b''
def _get_gnome_keyring_password(browser_keyring_name, logger):
if not SECRETSTORAGE_AVAILABLE:
logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
if not secretstorage:
logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
return b''
# the Gnome keyring does not seem to organise keys in the same way as KWallet,
# using `dbus-monitor` during startup, it can be observed that chromium lists all keys
@ -779,7 +815,7 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
with contextlib.closing(secretstorage.dbus_init()) as con:
col = secretstorage.get_default_collection(con)
for item in col.get_all_items():
if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
if item.get_label() == f'{browser_keyring_name} Safe Storage':
return item.get_secret()
else:
logger.error('failed to read from keyring')
@ -809,35 +845,35 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
def _get_mac_keyring_password(browser_keyring_name, logger):
logger.debug('using find-generic-password to obtain password from OSX keychain')
try:
proc = Popen(
stdout, _, returncode = Popen.run(
['security', 'find-generic-password',
'-w', # write password to stdout
'-a', browser_keyring_name, # match 'account'
'-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
'-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if stdout[-1:] == b'\n':
stdout = stdout[:-1]
return stdout
except BaseException as e:
logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
if returncode:
logger.warning('find-generic-password failed')
return None
return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
return None
def _get_windows_v10_key(browser_root, logger):
path = _find_most_recently_used_file(browser_root, 'Local State')
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
if path is None:
logger.error('could not find local state file')
return None
with open(path, 'r', encoding='utf8') as f:
logger.debug(f'Found local state file at "{path}"')
with open(path, encoding='utf8') as f:
data = json.load(f)
try:
base64_key = data['os_crypt']['encrypted_key']
except KeyError:
logger.error('no encrypted key in Local State')
return None
encrypted_key = compat_b64decode(base64_key)
encrypted_key = base64.b64decode(base64_key)
prefix = b'DPAPI'
if not encrypted_key.startswith(prefix):
logger.error('invalid key')
@ -852,7 +888,7 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try:
return plaintext.decode('utf-8')
return plaintext.decode()
except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
return None
@ -866,7 +902,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
return None
try:
return plaintext.decode('utf-8')
return plaintext.decode()
except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
return None
@ -877,10 +913,12 @@ def _decrypt_windows_dpapi(ciphertext, logger):
References:
- https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
"""
from ctypes.wintypes import DWORD
import ctypes
import ctypes.wintypes
class DATA_BLOB(ctypes.Structure):
_fields_ = [('cbData', DWORD),
_fields_ = [('cbData', ctypes.wintypes.DWORD),
('pbData', ctypes.POINTER(ctypes.c_char))]
buffer = ctypes.create_string_buffer(ciphertext)
@ -917,17 +955,20 @@ def _open_database_copy(database_path, tmpdir):
def _get_column_names(cursor, table_name):
table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
return [row[1].decode('utf-8') for row in table_info]
table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
return [row[1].decode() for row in table_info]
def _find_most_recently_used_file(root, filename):
def _find_most_recently_used_file(root, filename, logger):
# if there are multiple browser profiles, take the most recently used one
paths = []
for root, dirs, files in os.walk(root):
for file in files:
if file == filename:
paths.append(os.path.join(root, file))
i, paths = 0, []
with _create_progress_bar(logger) as progress_bar:
for curr_root, dirs, files in os.walk(root):
for file in files:
i += 1
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
if file == filename:
paths.append(os.path.join(curr_root, file))
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
@ -945,11 +986,102 @@ def _is_path(value):
return os.path.sep in value
def _parse_browser_specification(browser_name, profile=None, keyring=None):
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
if browser_name not in SUPPORTED_BROWSERS:
raise ValueError(f'unsupported browser: "{browser_name}"')
if keyring not in (None, *SUPPORTED_KEYRINGS):
raise ValueError(f'unsupported keyring: "{keyring}"')
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
return browser_name, profile, keyring
if profile is not None and _is_path(expand_path(profile)):
profile = expand_path(profile)
return browser_name, profile, keyring, container
class LenientSimpleCookie(http.cookies.SimpleCookie):
"""More lenient version of http.cookies.SimpleCookie"""
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
# We use Morsel's legal key chars to avoid errors on setting values
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_RESERVED = {
"expires",
"path",
"comment",
"domain",
"max-age",
"secure",
"httponly",
"version",
"samesite",
}
_FLAGS = {"secure", "httponly"}
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r"""
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
( # Start of potential value
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
) # End of group 'val'
| # or
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
) # End of potential value
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE)
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
if not isinstance(data, str):
return super().load(data)
morsel = None
for match in self._COOKIE_PATTERN.finditer(data):
if match.group('bad'):
morsel = None
continue
key, value = match.group('key', 'val')
is_attribute = False
if key.startswith('$'):
key = key[1:]
is_attribute = True
lower_key = key.lower()
if lower_key in self._RESERVED:
if morsel is None:
continue
if value is None:
if lower_key not in self._FLAGS:
morsel = None
continue
value = True
else:
value, _ = self.value_decode(value)
morsel[key] = value
elif is_attribute:
morsel = None
elif value is not None:
morsel = self.get(key, http.cookies.Morsel())
real_value, coded_value = self.value_decode(value)
morsel.set(key, real_value, coded_value)
self[key] = morsel
else:
morsel = None

View File

@ -0,0 +1,30 @@
import types
from ..compat import functools
from ..compat.compat_utils import passthrough_module
try:
import Cryptodome as _parent
except ImportError:
try:
import Crypto as _parent
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
_parent = types.ModuleType('no_Cryptodome')
__bool__ = lambda: False
passthrough_module(__name__, _parent, (..., '__version__'))
del passthrough_module
@property
@functools.cache
def _yt_dlp__identifier():
if _parent.__name__ == 'Crypto':
from Crypto.Cipher import AES
try:
# In pycrypto, mode defaults to ECB. See:
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
AES.new(b'abcdefghijklmnop')
except TypeError:
return 'pycrypto'
return _parent.__name__

View File

@ -0,0 +1,83 @@
# flake8: noqa: F401
"""Imports all optional dependencies for the project.
An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace"""
try:
import brotlicffi as brotli
except ImportError:
try:
import brotli
except ImportError:
brotli = None
try:
import certifi
except ImportError:
certifi = None
else:
from os.path import exists as _path_exists
# The certificate may not be bundled in executable
if not _path_exists(certifi.where()):
certifi = None
try:
import mutagen
except ImportError:
mutagen = None
secretstorage = None
try:
import secretstorage
_SECRETSTORAGE_UNAVAILABLE_REASON = None
except ImportError:
_SECRETSTORAGE_UNAVAILABLE_REASON = (
'as the `secretstorage` module is not installed. '
'Please install by running `python3 -m pip install secretstorage`')
except Exception as _err:
_SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
try:
import sqlite3
except ImportError:
# although sqlite3 is part of the standard library, it is possible to compile python without
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
sqlite3 = None
try:
import websockets
except (ImportError, SyntaxError):
# websockets 3.10 on python 3.6 causes SyntaxError
# See https://github.com/yt-dlp/yt-dlp/issues/2633
websockets = None
try:
import xattr # xattr or pyxattr
except ImportError:
xattr = None
else:
if hasattr(xattr, 'set'): # pyxattr
xattr._yt_dlp__identifier = 'pyxattr'
from . import Cryptodome
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
available_dependencies = {k: v for k, v in all_dependencies.items() if v}
# Deprecated
Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None
__all__ = [
'all_dependencies',
'available_dependencies',
*all_dependencies.keys(),
]

View File

@ -1,10 +1,4 @@
from __future__ import unicode_literals
from ..compat import compat_str
from ..utils import (
determine_protocol,
NO_DEFAULT
)
from ..utils import NO_DEFAULT, determine_protocol
def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
@ -29,20 +23,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
# Some of these require get_suitable_downloader
from .common import FileDownloader
from .dash import DashSegmentsFD
from .external import FFmpegFD, get_external_downloader
from .f4m import F4mFD
from .fc2 import FC2LiveFD
from .hls import HlsFD
from .http import HttpFD
from .rtmp import RtmpFD
from .rtsp import RtspFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD
from .rtmp import RtmpFD
from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD
from .external import (
get_external_downloader,
FFmpegFD,
)
PROTOCOL_MAP = {
'rtmp': RtmpFD,
@ -58,6 +50,7 @@ PROTOCOL_MAP = {
'ism': IsmFD,
'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD,
@ -66,10 +59,11 @@ PROTOCOL_MAP = {
def shorten_protocol_name(proto, simplify=False):
short_protocol_names = {
'm3u8_native': 'm3u8_n',
'rtmp_ffmpeg': 'rtmp_f',
'm3u8_native': 'm3u8',
'm3u8': 'm3u8F',
'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash',
'http_dash_segments_generator': 'dash_g',
'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag',
}
@ -77,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
short_protocol_names.update({
'https': 'http',
'ftps': 'ftp',
'm3u8': 'm3u8', # Reverse above m3u8 mapping
'm3u8_native': 'm3u8',
'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
@ -91,13 +86,13 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if default is NO_DEFAULT:
default = HttpFD
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
# return FFmpegFD
if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
return FFmpegFD
info_dict['protocol'] = protocol
downloaders = params.get('external_downloader')
external_downloader = (
downloaders if isinstance(downloaders, compat_str) or downloaders is None
downloaders if isinstance(downloaders, str) or downloaders is None
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
if external_downloader is None:
@ -117,7 +112,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
return FFmpegFD
elif (external_downloader or '').lower() == 'native':
return HlsFD
elif get_suitable_downloader(
elif protocol == 'm3u8_native' and get_suitable_downloader(
info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
return HlsFD
elif params.get('hls_prefer_native') is True:

View File

@ -1,30 +1,40 @@
from __future__ import division, unicode_literals
import contextlib
import errno
import functools
import os
import random
import re
import time
import random
import errno
from ..minicurses import (
BreaklineStatusPrinter,
MultilineLogger,
MultilinePrinter,
QuietMultilinePrinter,
)
from ..utils import (
IDENTITY,
NO_DEFAULT,
LockingUnsupportedError,
Namespace,
RetryManager,
classproperty,
decodeArgument,
deprecation_warning,
encodeFilename,
error_to_compat_str,
format_bytes,
join_nonempty,
parse_bytes,
remove_start,
sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
)
from ..minicurses import (
MultilineLogger,
MultilinePrinter,
QuietMultilinePrinter,
BreaklineStatusPrinter
try_call,
)
class FileDownloader(object):
class FileDownloader:
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
@ -39,6 +49,7 @@ class FileDownloader(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
file_access_retries: Number of times to retry on file access error
@ -62,6 +73,7 @@ class FileDownloader(object):
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
progress_template: See YoutubeDL.py
retry_sleep_functions: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@ -71,21 +83,51 @@ class FileDownloader(object):
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._set_ydl(ydl)
self._progress_hooks = []
self.params = params
self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
def _set_ydl(self, ydl):
self.ydl = ydl
for func in (
'deprecation_warning',
'deprecated_feature',
'report_error',
'report_file_already_downloaded',
'report_warning',
'to_console_title',
'to_stderr',
'trouble',
'write_debug',
):
if not hasattr(self, func):
setattr(self, func, getattr(ydl, func))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
__to_screen = to_screen
@classproperty
def FD_NAME(cls):
return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
@staticmethod
def format_seconds(seconds):
if seconds is None:
return ' Unknown'
time = timetuple_from_msec(seconds * 1000)
if time.hours > 99:
return '--:--:--'
if not time.hours:
return '%02d:%02d' % time[1:-1]
return '%02d:%02d:%02d' % time[:-1]
@classmethod
def format_eta(cls, seconds):
return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@ -94,11 +136,7 @@ class FileDownloader(object):
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
elif percent == 100:
return '100%'
return '%6s' % ('%3.1f%%' % percent)
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@ -112,12 +150,6 @@ class FileDownloader(object):
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
@ -127,13 +159,11 @@ class FileDownloader(object):
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
@staticmethod
def format_retries(retries):
return 'inf' if retries == float('inf') else '%.0f' % retries
return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def best_block_size(elapsed_time, bytes):
@ -151,33 +181,9 @@ class FileDownloader(object):
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
def to_stderr(self, message):
self.ydl.to_stderr(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def write_debug(self, *args, **kargs):
self.ydl.write_debug(*args, **kargs)
deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
return parse_bytes(bytestr)
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
@ -210,28 +216,42 @@ class FileDownloader(object):
def ytdl_filename(self, filename):
return filename + '.ytdl'
def sanitize_open(self, filename, open_mode):
file_access_retries = self.params.get('file_access_retries', 10)
retry = 0
while True:
try:
return sanitize_open(filename, open_mode)
except (IOError, OSError) as err:
retry = retry + 1
if retry > file_access_retries or err.errno not in (errno.EACCES,):
raise
self.to_screen(
'[download] Got file access error. Retrying (attempt %d of %s) ...'
% (retry, self.format_retries(file_access_retries)))
time.sleep(0.01)
def wrap_file_access(action, *, fatal=False):
def error_callback(err, count, retries, *, fd):
return RetryManager.report_retry(
err, count, retries, info=fd.__to_screen,
warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
def wrapper(self, func, *args, **kwargs):
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
try:
return func(self, *args, **kwargs)
except OSError as err:
if err.errno in (errno.EACCES, errno.EINVAL):
retry.error = err
continue
retry.error_callback(err, 1, 0)
return functools.partial(functools.partialmethod, wrapper)
@wrap_file_access('open', fatal=True)
def sanitize_open(self, filename, open_mode):
f, filename = sanitize_open(filename, open_mode)
if not getattr(f, 'locked', None):
self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
return f, filename
@wrap_file_access('remove')
def try_remove(self, filename):
os.remove(filename)
@wrap_file_access('rename')
def try_rename(self, old_filename, new_filename):
if old_filename == new_filename:
return
try:
os.replace(old_filename, new_filename)
except (IOError, OSError) as err:
self.report_error(f'unable to rename file: {err}')
os.replace(old_filename, new_filename)
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
@ -248,10 +268,8 @@ class FileDownloader(object):
# Ignore obviously invalid dates
if filetime == 0:
return
try:
with contextlib.suppress(Exception):
os.utime(filename, (time.time(), filetime))
except Exception:
pass
return filetime
def report_destination(self, filename):
@ -264,26 +282,26 @@ class FileDownloader(object):
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
self._multiline.end()
_progress_styles = {
'downloaded_bytes': 'light blue',
'percent': 'light blue',
'eta': 'yellow',
'speed': 'green',
'elapsed': 'bold white',
'total_bytes': '',
'total_bytes_estimate': '',
}
ProgressStyles = Namespace(
downloaded_bytes='light blue',
percent='light blue',
eta='yellow',
speed='green',
elapsed='bold white',
total_bytes='',
total_bytes_estimate='',
)
def _report_progress_status(self, s, default_template):
for name, style in self._progress_styles.items():
for name, style in self.ProgressStyles.items_:
name = f'_{name}_str'
if name not in s:
continue
@ -307,78 +325,73 @@ class FileDownloader(object):
self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
def report_progress(self, s):
def with_fields(*tups, default=''):
for *fields, tmpl in tups:
if all(s.get(f) is not None for f in fields):
return tmpl
return default
_format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
if s['status'] == 'finished':
if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
msg_template = '100%%'
if s.get('total_bytes') is not None:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
msg_template += ' of %(_total_bytes_str)s'
if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template += ' in %(_elapsed_str)s'
s['_percent_str'] = self.format_percent(100)
self._report_progress_status(s, msg_template)
return
speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
s.update({
'speed': speed,
'_speed_str': self.format_speed(speed).strip(),
'_total_bytes_str': _format_bytes('total_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
'_percent_str': self.format_percent(100),
})
self._report_progress_status(s, join_nonempty(
'100%%',
with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
with_fields(('elapsed', 'in %(_elapsed_str)s')),
with_fields(('speed', 'at %(_speed_str)s')),
delim=' '))
if s['status'] != 'downloading':
return
if s.get('eta') is not None:
s['_eta_str'] = self.format_eta(s['eta'])
else:
s['_eta_str'] = 'Unknown'
s.update({
'_eta_str': self.format_eta(s.get('eta')).strip(),
'_speed_str': self.format_speed(s.get('speed')),
'_percent_str': self.format_percent(try_call(
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
lambda: s['downloaded_bytes'] == 0 and 0)),
'_total_bytes_str': _format_bytes('total_bytes'),
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
})
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
else:
if s.get('downloaded_bytes') == 0:
s['_percent_str'] = self.format_percent(0)
else:
s['_percent_str'] = 'Unknown %'
msg_template = with_fields(
('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
if s.get('speed') is not None:
s['_speed_str'] = self.format_speed(s['speed'])
else:
s['_speed_str'] = 'Unknown speed'
if s.get('total_bytes') is not None:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
elif s.get('total_bytes_estimate') is not None:
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
else:
if s.get('downloaded_bytes') is not None:
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
if s.get('elapsed'):
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
else:
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
if s.get('fragment_index') and s.get('fragment_count'):
msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
elif s.get('fragment_index'):
msg_template += ' (frag %(fragment_index)s)'
msg_template += with_fields(
('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
('fragment_index', ' (frag %(fragment_index)s)'))
self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, *args, **kwargs):
"""Report file has already been fully downloaded."""
return self.ydl.report_file_already_downloaded(*args, **kwargs)
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
"""Report retry"""
is_frag = False if frag_index is NO_DEFAULT else 'fragment'
RetryManager.report_retry(
err, count, retries, info=self.__to_screen,
warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
@ -418,25 +431,16 @@ class FileDownloader(object):
self._finish_multiline_status()
return True, False
if subtitle is False:
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen(
'[download] Sleeping %s seconds ...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
if subtitle:
sleep_interval = self.params.get('sleep_interval_subtitles') or 0
else:
sleep_interval_sub = 0
if type(self.params.get('sleep_interval_subtitles')) is int:
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
if sleep_interval_sub > 0:
self.to_screen(
'[download] Sleeping %s seconds ...' % (
sleep_interval_sub))
time.sleep(sleep_interval_sub)
min_sleep_interval = self.params.get('sleep_interval') or 0
sleep_interval = random.uniform(
min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
if sleep_interval > 0:
self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
time.sleep(sleep_interval)
ret = self.real_download(filename, info_dict)
self._finish_multiline_status()
return ret, True
@ -446,8 +450,7 @@ class FileDownloader(object):
raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict):
if not self._progress_hooks:
return
# Ideally we want to make a copy of the dict, but that is too slow
status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this.
@ -469,4 +472,4 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))
self.write_debug(f'{exe} command line: {shell_quote(str_args)}')

View File

@ -1,10 +1,9 @@
from __future__ import unicode_literals
import time
import urllib.parse
from ..downloader import get_suitable_downloader
from . import get_suitable_downloader
from .fragment import FragmentFD
from ..utils import urljoin
from ..utils import update_url_query, urljoin
class DashSegmentsFD(FragmentFD):
@ -42,24 +41,29 @@ class DashSegmentsFD(FragmentFD):
self._prepare_and_start_frag_download(ctx, fmt)
ctx['start'] = real_start
fragments_to_download = self._get_fragments(fmt, ctx)
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
if real_downloader:
self.to_screen(
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
info_dict['fragments'] = list(fragments_to_download)
fd = real_downloader(self.ydl, self.params)
return fd.real_download(filename, info_dict)
args.append([ctx, fragments_to_download, fmt])
return self.download_and_append_fragments_multiple(*args)
return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
def _resolve_fragments(self, fragments, ctx):
fragments = fragments(ctx) if callable(fragments) else fragments
return [next(iter(fragments))] if self.params.get('test') else fragments
def _get_fragments(self, fmt, ctx):
def _get_fragments(self, fmt, ctx, extra_query):
fragment_base_url = fmt.get('fragment_base_url')
fragments = self._resolve_fragments(fmt['fragments'], ctx)
@ -72,9 +76,12 @@ class DashSegmentsFD(FragmentFD):
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
if extra_query:
fragment_url = update_url_query(fragment_url, extra_query)
yield {
'frag_index': frag_index,
'fragment_count': fragment.get('fragment_count'),
'index': i,
'url': fragment_url,
}

View File

@ -1,35 +1,44 @@
from __future__ import unicode_literals
import enum
import json
import os.path
import re
import subprocess
import sys
import time
import uuid
from .fragment import FragmentFD
from ..compat import (
compat_setenv,
compat_str,
)
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..compat import functools
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
Popen,
RetryManager,
_configuration_args,
check_executable,
classproperty,
cli_bool_option,
cli_option,
cli_valueless_option,
cli_bool_option,
_configuration_args,
determine_ext,
encodeFilename,
encodeArgument,
encodeFilename,
find_available_port,
handle_youtubedl_headers,
check_executable,
Popen,
remove_end,
sanitized_Request,
traverse_obj,
)
class Features(enum.Enum):
TO_STDOUT = enum.auto()
MULTIPLE_FORMATS = enum.auto()
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
can_download_to_stdout = False
SUPPORTED_FEATURES = ()
_CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
@ -55,7 +64,6 @@ class ExternalFD(FragmentFD):
}
if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
self.try_rename(tmpfilename, filename)
status.update({
'downloaded_bytes': fsize,
@ -73,23 +81,32 @@ class ExternalFD(FragmentFD):
def get_basename(cls):
return cls.__name__[:-2].lower()
@property
@classproperty
def EXE_NAME(cls):
return cls.get_basename()
@functools.cached_property
def exe(self):
return self.get_basename()
return self.EXE_NAME
@classmethod
def available(cls, path=None):
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
if path:
cls.exe = path
return path
return False
path = check_executable(
cls.EXE_NAME if path in (None, cls.get_basename()) else path,
[cls.AVAILABLE_OPT])
if not path:
return False
cls.exe = path
return path
@classmethod
def supports(cls, info_dict):
return (
(cls.can_download_to_stdout or not info_dict.get('to_stdout'))
and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
))
@classmethod
def can_download(cls, info_dict, path=None):
@ -106,7 +123,7 @@ class ExternalFD(FragmentFD):
def _configuration_args(self, keys=None, *args, **kwargs):
return _configuration_args(
self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict):
@ -116,33 +133,27 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
p = Popen(cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate_or_kill()
if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
_, stderr, returncode = self._call_process(cmd, info_dict)
if returncode and stderr:
self.to_stderr(stderr)
return returncode
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
count = 0
while count <= fragment_retries:
p = Popen(cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate_or_kill()
if p.returncode == 0:
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=None, fatal=not skip_unavailable_fragments)
for retry in retry_manager:
_, stderr, returncode = self._call_process(cmd, info_dict)
if not returncode:
break
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
self.to_stderr(stderr.decode('utf-8', 'replace'))
count += 1
if count <= fragment_retries:
self.to_screen(
'[%s] Got error. Retrying fragments (attempt %d of %s)...'
% (self.get_basename(), count, self.format_retries(fragment_retries)))
if count > fragment_retries:
if not skip_unavailable_fragments:
self.report_error('Giving up after %s fragment retries' % fragment_retries)
return -1
if stderr:
self.to_stderr(stderr)
retry.error = Exception()
continue
if not skip_unavailable_fragments and retry_manager.error:
return -1
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
@ -150,7 +161,7 @@ class ExternalFD(FragmentFD):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
except IOError as err:
except OSError as err:
if skip_unavailable_fragments and frag_index > 1:
self.report_skip_fragment(frag_index, err)
continue
@ -159,20 +170,24 @@ class ExternalFD(FragmentFD):
dest.write(decrypt_fragment(fragment, src.read()))
src.close()
if not self.params.get('keep_fragments', False):
os.remove(encodeFilename(fragment_filename))
self.try_remove(encodeFilename(fragment_filename))
dest.close()
os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
_CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename]
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--header', f'{key}: {val}']
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
@ -191,16 +206,6 @@ class CurlFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
def _call_downloader(self, tmpfilename, info_dict):
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
self._debug_cmd(cmd)
# curl writes the progress to stderr so don't capture it.
p = Popen(cmd)
p.communicate_or_kill()
return p.returncode
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
@ -209,7 +214,7 @@ class AxelFD(ExternalFD):
cmd = [self.exe, '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
cmd += ['-H', f'{key}: {val}']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@ -219,10 +224,10 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--header', f'{key}: {val}']
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@ -230,7 +235,10 @@ class WgetFD(ExternalFD):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy')
proxy = self.params.get('proxy')
if proxy:
for var in ('http_proxy', 'https_proxy'):
cmd += ['--execute', f'{var}={proxy}']
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@ -250,10 +258,23 @@ class Aria2cFD(ExternalFD):
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
return all(check_results)
@staticmethod
def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
def _call_downloader(self, tmpfilename, info_dict):
# FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931
if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
info_dict['__rpc'] = {
'port': find_available_port() or 19190,
'secret': str(uuid.uuid4()),
}
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--file-allocation=none', '-x16', '-j16', '-s16']
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else:
@ -261,7 +282,7 @@ class Aria2cFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--header', f'{key}: {val}']
cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
@ -270,6 +291,12 @@ class Aria2cFD(ExternalFD):
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
if '__rpc' in info_dict:
cmd += [
'--enable-rpc',
f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
f'--rpc-secret={info_dict["__rpc"]["secret"]}']
# aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path.
@ -278,11 +305,9 @@ class Aria2cFD(ExternalFD):
# https://github.com/aria2/aria2/issues/1373
dn = os.path.dirname(tmpfilename)
if dn:
if not os.path.isabs(dn):
dn = '.%s%s' % (os.path.sep, dn)
cmd += ['--dir', dn + os.path.sep]
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
if 'fragments' not in info_dict:
cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict:
@ -291,35 +316,114 @@ class Aria2cFD(ExternalFD):
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode('utf-8'))
stream.write('\n'.join(url_list).encode())
stream.close()
cmd += ['-i', url_list_file]
cmd += ['-i', self._aria2c_filename(url_list_file)]
else:
cmd += ['--', info_dict['url']]
return cmd
def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
# Does not actually need to be UUID, just unique
sanitycheck = str(uuid.uuid4())
d = json.dumps({
'jsonrpc': '2.0',
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
request = sanitized_Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__',
})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
return resp['result']
def _call_process(self, cmd, info_dict):
if '__rpc' not in info_dict:
return super()._call_process(cmd, info_dict)
send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
started = time.time()
fragmented = 'fragments' in info_dict
frag_count = len(info_dict['fragments']) if fragmented else 1
status = {
'filename': info_dict.get('_filename'),
'status': 'downloading',
'elapsed': 0,
'downloaded_bytes': 0,
'fragment_count': frag_count if fragmented else None,
'fragment_index': 0 if fragmented else None,
}
self._hook_progress(status, info_dict)
def get_stat(key, *obj, average=False):
val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
return sum(val) / (len(val) if average else 1)
with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
# Add a small sleep so that RPC client can receive response,
# or the connection stalls infinitely
time.sleep(0.2)
retval = p.poll()
while retval is None:
# We don't use tellStatus as we won't know the GID without reading stdout
# Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
active = send_rpc('aria2.tellActive')
completed = send_rpc('aria2.tellStopped', [0, frag_count])
downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
speed = get_stat('downloadSpeed', active)
total = frag_count * get_stat('totalLength', active, completed, average=True)
if total < downloaded:
total = None
status.update({
'downloaded_bytes': int(downloaded),
'speed': speed,
'total_bytes': None if fragmented else total,
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
})
self._hook_progress(status, info_dict)
if not active and len(completed) >= frag_count:
send_rpc('aria2.shutdown')
retval = p.wait()
break
time.sleep(0.1)
retval = p.poll()
return '', p.stderr.read(), retval
class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version'
@classmethod
def available(cls, path=None):
return super().available(path or 'http')
EXE_NAME = 'http'
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]
cmd += [f'{key}:{val}']
return cmd
class FFmpegFD(ExternalFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
can_download_to_stdout = True
SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
@classmethod
def available(cls, path=None):
@ -327,10 +431,6 @@ class FFmpegFD(ExternalFD):
# Fixme: This may be wrong when --ffmpeg-location is used
return FFmpegPostProcessor().available
@classmethod
def supports(cls, info_dict):
return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
def on_process_started(self, proc, stdin):
""" Override this in subclasses """
pass
@ -345,7 +445,6 @@ class FFmpegFD(ExternalFD):
and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict):
urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
@ -361,9 +460,11 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'):
args += ['-hide_banner']
args += info_dict.get('_ffmpeg_args', [])
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
# This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
# These exists only for compatibility. Extractors should use
# info_dict['downloader_options']['ffmpeg_args'] instead
args += info_dict.get('_ffmpeg_args') or []
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
@ -373,21 +474,6 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
# start_time = info_dict.get('start_time') or 0
# if start_time:
# args += ['-ss', compat_str(start_time)]
# end_time = info_dict.get('end_time')
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
args += [
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
env = None
proxy = self.params.get('proxy')
if proxy:
@ -404,8 +490,8 @@ class FFmpegFD(ExternalFD):
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
compat_setenv('HTTP_PROXY', proxy, env=env)
compat_setenv('http_proxy', proxy, env=env)
env['HTTP_PROXY'] = proxy
env['http_proxy'] = proxy
protocol = info_dict.get('protocol')
@ -435,20 +521,36 @@ class FFmpegFD(ExternalFD):
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
elif isinstance(conn, compat_str):
elif isinstance(conn, str):
args += ['-rtmp_conn', conn]
for i, url in enumerate(urls):
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
if start_time:
args += ['-ss', str(start_time)]
if end_time:
args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']
args += ['-c', 'copy']
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
for i, fmt in enumerate(selected_formats):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
args += ['-fs', str(self._TEST_FILE_SIZE)]
ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
@ -483,35 +585,35 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
proc = Popen(args, stdin=subprocess.PIPE, env=env)
if url in ('-', 'pipe:'):
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
proc.communicate_or_kill(b'q')
else:
proc.kill()
proc.wait()
raise
return retval
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
if piped:
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped:
proc.communicate_or_kill(b'q')
else:
proc.kill(timeout=None)
raise
return retval
class AVconvFD(FFmpegFD):
pass
_BY_NAME = dict(
(klass.get_basename(), klass)
_BY_NAME = {
klass.get_basename(): klass
for name, klass in globals().items()
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
)
}
def list_external_downloaders():
@ -519,8 +621,8 @@ def list_external_downloaders():
def get_external_downloader(external_downloader):
""" Given the name of the executable, see whether we support the given
downloader . """
# Drop .exe extension on Windows
""" Given the name of the executable, see whether we support the given downloader """
bn = os.path.splitext(os.path.basename(external_downloader))[0]
return _BY_NAME.get(bn)
return _BY_NAME.get(bn) or next((
klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
), None)

View File

@ -1,23 +1,14 @@
from __future__ import division, unicode_literals
import base64
import io
import itertools
import struct
import time
import urllib.error
import urllib.parse
from .fragment import FragmentFD
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_urlparse,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_struct_pack,
compat_struct_unpack,
)
from ..utils import (
fix_xml_ampersands,
xpath_text,
)
from ..compat import compat_etree_fromstring
from ..utils import fix_xml_ampersands, xpath_text
class DataTruncatedError(Exception):
@ -40,13 +31,13 @@ class FlvReader(io.BytesIO):
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
return compat_struct_unpack('!Q', self.read_bytes(8))[0]
return struct.unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
return compat_struct_unpack('!I', self.read_bytes(4))[0]
return struct.unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
return compat_struct_unpack('!B', self.read_bytes(1))[0]
return struct.unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
@ -193,7 +184,7 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
# In some live HDS streams (for example Rai), `fragments_count` is
# In some live HDS streams (e.g. Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
@ -208,11 +199,11 @@ def build_fragments_list(boot_info):
def write_unsigned_int(stream, val):
stream.write(compat_struct_pack('!I', val))
stream.write(struct.pack('!I', val))
def write_unsigned_int_24(stream, val):
stream.write(compat_struct_pack('!I', val)[1:])
stream.write(struct.pack('!I', val)[1:])
def write_flv_header(stream):
@ -261,8 +252,6 @@ class F4mFD(FragmentFD):
A downloader for f4m manifests or AdobeHDS.
"""
FD_NAME = 'f4m'
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
@ -308,12 +297,12 @@ class F4mFD(FragmentFD):
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
bootstrap_url = compat_urlparse.urljoin(
bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = compat_b64decode(node.text)
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@ -343,14 +332,14 @@ class F4mFD(FragmentFD):
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = compat_b64decode(metadata_node.text)
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
@ -378,7 +367,7 @@ class F4mFD(FragmentFD):
if not live:
write_metadata_tag(dest_stream, metadata)
base_url_parsed = compat_urllib_parse_urlparse(base_url)
base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)
@ -398,9 +387,10 @@ class F4mFD(FragmentFD):
query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
try:
success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success:
return False
down_data = self._read_fragment(ctx)
reader = FlvReader(down_data)
while True:
try:
@ -417,7 +407,7 @@ class F4mFD(FragmentFD):
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
except (compat_urllib_error.HTTPError, ) as err:
except urllib.error.HTTPError as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
@ -434,6 +424,4 @@ class F4mFD(FragmentFD):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
self._finish_frag_download(ctx, info_dict)
return True
return self._finish_frag_download(ctx, info_dict)

View File

@ -0,0 +1,46 @@
import threading
from .common import FileDownloader
from .external import FFmpegFD
class FC2LiveFD(FileDownloader):
"""
Downloads FC2 live without being stopped. <br>
Note, this is not a part of public API, and will be removed without notice.
DO NOT USE
"""
def real_download(self, filename, info_dict):
ws = info_dict['ws']
heartbeat_lock = threading.Lock()
heartbeat_state = [None, 1]
def heartbeat():
if heartbeat_state[1] < 0:
return
try:
heartbeat_state[1] += 1
ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
except Exception:
self.to_screen('[fc2:live] Heartbeat failed')
with heartbeat_lock:
heartbeat_state[0] = threading.Timer(30, heartbeat)
heartbeat_state[0]._daemonic = True
heartbeat_state[0].start()
heartbeat()
new_info_dict = info_dict.copy()
new_info_dict.update({
'ws': None,
'protocol': 'live_ffmpeg',
})
try:
return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
finally:
# stop heartbeating
heartbeat_state[1] = -1

View File

@ -1,30 +1,23 @@
from __future__ import division, unicode_literals
import concurrent.futures
import contextlib
import http.client
import json
import math
import os
import struct
import time
try:
import concurrent.futures
can_threaded_download = True
except ImportError:
can_threaded_download = False
import urllib.error
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import (
compat_os_name,
compat_urllib_error,
compat_struct_pack,
)
from ..compat import compat_os_name
from ..utils import (
DownloadError,
error_to_compat_str,
RetryManager,
encodeFilename,
sanitized_Request,
traverse_obj,
)
@ -32,9 +25,7 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
def report_retry(self, err, count, retries):
super().to_screen(
f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
to_console_title = to_screen
class FragmentFD(FileDownloader):
@ -74,9 +65,9 @@ class FragmentFD(FileDownloader):
"""
def report_retry_fragment(self, err, frag_index, count, retries):
self.to_screen(
'\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. '
'Use yt_dlp.downloader.FileDownloader.report_retry instead')
return self.report_retry(err, count, retries, frag_index)
def report_skip_fragment(self, frag_index, err=None):
err = f' {err};' if err else ''
@ -130,16 +121,23 @@ class FragmentFD(FileDownloader):
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
return False
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
ctx['fragment_filename_sanitized'] = fragment_filename
return True, self._read_fragment(ctx)
return True
def _read_fragment(self, ctx):
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
if not ctx.get('fragment_filename_sanitized'):
return None
try:
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
except FileNotFoundError:
if ctx.get('live'):
return None
raise
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
@ -153,7 +151,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False):
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
@ -166,21 +164,13 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': self.params.get('quiet'),
'noprogress': True,
'ratelimit': self.params.get('ratelimit'),
'retries': self.params.get('retries', 0),
'nopart': self.params.get('nopart', False),
'test': self.params.get('test', False),
}
)
dl = HttpQuietDownloader(self.ydl, {
**self.params,
'noprogress': True,
'test': False,
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
@ -253,6 +243,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
if not total_frags and ctx.get('fragment_count'):
state['fragment_count'] = ctx['fragment_count']
if ctx_id is not None and s.get('ctx_id') != ctx_id:
return
@ -299,21 +292,26 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
os.remove(ytdl_filename)
self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started']
if ctx['tmpfilename'] == '-':
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
to_file = ctx['tmpfilename'] != '-'
if to_file:
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
else:
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
if not downloaded_bytes:
if to_file:
self.try_remove(ctx['tmpfilename'])
self.report_error('The downloaded file is empty')
return False
elif to_file:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
if self.params.get('updatetime', True):
filetime = ctx.get('fragment_filetime')
if filetime:
try:
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
filetime = ctx.get('fragment_filetime')
if self.params.get('updatetime', True) and filetime:
with contextlib.suppress(Exception):
os.utime(ctx['filename'], (time.time(), filetime))
self._hook_progress({
'downloaded_bytes': downloaded_bytes,
@ -325,6 +323,7 @@ class FragmentFD(FileDownloader):
'max_progress': ctx.get('max_progress'),
'progress_idx': ctx.get('progress_idx'),
}, info_dict)
return True
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
@ -336,8 +335,7 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
tmpfilename = self.temp_name(ctx['filename'])
@ -356,11 +354,14 @@ class FragmentFD(FileDownloader):
return _key_cache[url]
def decrypt_fragment(fragment, frag_content):
if frag_content is None:
return
decrypt_info = fragment.get('decrypt_info')
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
return frag_content
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = (decrypt_info.get('KEY')
or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI']))
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
# not what it decrypts to.
@ -370,7 +371,7 @@ class FragmentFD(FileDownloader):
return decrypt_fragment
def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
def download_and_append_fragments_multiple(self, *args, **kwargs):
'''
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
@ -378,63 +379,76 @@ class FragmentFD(FileDownloader):
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
return self.download_and_append_fragments(*args[0], **kwargs)
max_workers = self.params.get('concurrent_fragment_downloads', 1)
if max_progress > 1:
self._prepare_multiline_status(max_progress)
is_live = any(traverse_obj(args, (..., 2, 'is_live')))
def thread_func(idx, ctx, fragments, info_dict, tpe):
ctx['max_progress'] = max_progress
ctx['progress_idx'] = idx
return self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
tpe=tpe, interrupt_trigger=interrupt_trigger)
ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
class FTPE(concurrent.futures.ThreadPoolExecutor):
# has to stop this or it's going to wait on the worker thread itself
def __exit__(self, exc_type, exc_val, exc_tb):
pass
spins = []
if compat_os_name == 'nt':
self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. '
'This is a known issue and patches are welcome')
def future_result(future):
while True:
try:
return future.result(0.1)
except KeyboardInterrupt:
raise
except concurrent.futures.TimeoutError:
continue
else:
def future_result(future):
return future.result()
def interrupt_trigger_iter(fg):
for f in fg:
if not interrupt_trigger[0]:
break
yield f
spins = []
for idx, (ctx, fragments, info_dict) in enumerate(args):
tpe = FTPE(math.ceil(max_workers / max_progress))
job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe)
spins.append((tpe, job))
result = True
for tpe, job in spins:
try:
result = result and job.result()
result = result and future_result(job)
except KeyboardInterrupt:
interrupt_trigger[0] = False
finally:
tpe.shutdown(wait=True)
if not interrupt_trigger[0]:
if not interrupt_trigger[0] and not is_live:
raise KeyboardInterrupt()
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
# so returning a intermediate result here instead of KeyboardInterrupt on live
return result
def download_and_append_fragments(
self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
tpe=None, interrupt_trigger=None):
if not interrupt_trigger:
interrupt_trigger = (True, )
self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
pack_func=(lambda content, idx: content), finish_func=None,
tpe=None, interrupt_trigger=(True, )):
fragment_retries = self.params.get('fragment_retries', 0)
is_fatal = (
((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
if not pack_func:
pack_func = lambda frag_content, _: frag_content
if not self.params.get('skip_unavailable_fragments', True):
is_fatal = lambda _: True
def download_fragment(fragment, ctx):
if not interrupt_trigger[0]:
return
frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None
if not interrupt_trigger[0]:
return False, frag_index
headers = info_dict.get('http_headers', {}).copy()
byte_range = fragment.get('byte_range')
if byte_range:
@ -442,82 +456,78 @@ class FragmentFD(FileDownloader):
# Never skip the first fragment
fatal = is_fatal(fragment.get('index') or (frag_index - 1))
count, frag_content = 0, None
while count <= fragment_retries:
try:
success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
if not success:
return False, frag_index
break
except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
# https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
ctx['last_error'] = err
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
except DownloadError:
# Don't retry fragment if error occurred during HTTP downloading
# itself since it has own retry settings
if not fatal:
break
raise
if count > fragment_retries:
if not fatal:
return False, frag_index
ctx['dest_stream'].close()
self.report_error('Giving up after %s fragment retries' % fragment_retries)
return False, frag_index
return frag_content, frag_index
def error_callback(err, count, retries):
if fatal and count > retries:
ctx['dest_stream'].close()
self.report_retry(err, count, retries, frag_index, fatal)
ctx['last_error'] = err
for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
try:
ctx['fragment_count'] = fragment.get('fragment_count')
if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
return
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
retry.error = err
continue
except DownloadError: # has own retry settings
if fatal:
raise
def append_fragment(frag_content, frag_index, ctx):
if not frag_content:
if not is_fatal(frag_index - 1):
self.report_skip_fragment(frag_index, 'fragment not found')
return True
else:
ctx['dest_stream'].close()
self.report_error(
'fragment %s not found, unable to continue' % frag_index)
return False
self._append_fragment(ctx, pack_func(frag_content, frag_index))
if frag_content:
self._append_fragment(ctx, pack_func(frag_content, frag_index))
elif not is_fatal(frag_index - 1):
self.report_skip_fragment(frag_index, 'fragment not found')
else:
ctx['dest_stream'].close()
self.report_error(f'fragment {frag_index} not found, unable to continue')
return False
return True
decrypt_fragment = self.decrypter(info_dict)
max_workers = math.ceil(
self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
if can_threaded_download and max_workers > 1:
if max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
frag_content, frag_index = download_fragment(fragment, ctx_copy)
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
if not interrupt_trigger[0]:
break
ctx['fragment_filename_sanitized'] = frag_filename
ctx['fragment_index'] = frag_index
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
if not result:
return False
try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
ctx.update({
'fragment_filename_sanitized': frag_filename,
'fragment_index': frag_index,
})
if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
return False
except KeyboardInterrupt:
self._finish_multiline_status()
self.report_error(
'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
pool.shutdown(wait=False)
raise
else:
for fragment in fragments:
if not interrupt_trigger[0]:
break
frag_content, frag_index = download_fragment(fragment, ctx)
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
try:
download_fragment(fragment, ctx)
result = append_fragment(
decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
except KeyboardInterrupt:
if info_dict.get('is_live'):
break
raise
if not result:
return False
if finish_func is not None:
ctx['dest_stream'].write(finish_func())
ctx['dest_stream'].flush()
self._finish_frag_download(ctx, info_dict)
return True
return self._finish_frag_download(ctx, info_dict)

View File

@ -1,23 +1,21 @@
from __future__ import unicode_literals
import re
import io
import binascii
import io
import re
import urllib.parse
from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
from . import get_suitable_downloader
from .external import FFmpegFD
from ..compat import (
compat_pycrypto_AES,
compat_urlparse,
)
from ..utils import (
parse_m3u8_attributes,
update_url_query,
bug_reports_message,
)
from .fragment import FragmentFD
from .. import webvtt
from ..dependencies import Cryptodome
from ..utils import (
bug_reports_message,
parse_m3u8_attributes,
remove_start,
traverse_obj,
update_url_query,
urljoin,
)
class HlsFD(FragmentFD):
@ -70,12 +68,18 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
if FFmpegFD.available():
if can_download:
has_ffmpeg = FFmpegFD.available()
no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s
if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
else:
elif no_crypto:
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
'Decryption will be performed natively, but will be extremely slow')
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
@ -102,8 +106,7 @@ class HlsFD(FragmentFD):
if real_downloader and not real_downloader.supports_manifest(s):
real_downloader = None
if real_downloader:
self.to_screen(
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
@ -150,10 +153,17 @@ class HlsFD(FragmentFD):
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key'))
if external_aes_key:
external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x'))
assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key'
external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv'))
if external_aes_iv:
external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
byte_range = {}
discontinuity_count = 0
frag_index = 0
@ -169,10 +179,7 @@ class HlsFD(FragmentFD):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
frag_url = (
line
if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line))
frag_url = urljoin(man_url, line)
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@ -194,13 +201,18 @@ class HlsFD(FragmentFD):
return False
frag_index += 1
map_info = parse_m3u8_attributes(line[11:])
frag_url = (
map_info.get('URI')
if re.match(r'^https?://', map_info.get('URI'))
else compat_urlparse.urljoin(man_url, map_info.get('URI')))
frag_url = urljoin(man_url, map_info.get('URI'))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
fragments.append({
'frag_index': frag_index,
'url': frag_url,
@ -210,27 +222,22 @@ class HlsFD(FragmentFD):
})
media_sequence += 1
if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
if decrypt_info['METHOD'] == 'AES-128':
if 'IV' in decrypt_info:
if external_aes_iv:
decrypt_info['IV'] = external_aes_iv
elif 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if not re.match(r'^https?://', decrypt_info['URI']):
decrypt_info['URI'] = compat_urlparse.urljoin(
man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
if external_aes_key:
decrypt_info['KEY'] = external_aes_key
else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])
@ -339,7 +346,7 @@ class HlsFD(FragmentFD):
continue
block.write_into(output)
return output.getvalue().encode('utf-8')
return output.getvalue().encode()
def fin_fragments():
dedup_window = extra_state.get('webvtt_dedup_window')
@ -350,7 +357,7 @@ class HlsFD(FragmentFD):
for cue in dedup_window:
webvtt.CueBlock.from_json(cue).write_into(output)
return output.getvalue().encode('utf-8')
return output.getvalue().encode()
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)

View File

@ -1,26 +1,32 @@
from __future__ import unicode_literals
import errno
import http.client
import os
import socket
import time
import random
import re
import socket
import ssl
import time
import urllib.error
from .common import FileDownloader
from ..compat import (
compat_str,
compat_urllib_error,
)
from ..utils import (
ContentTooShortError,
encodeFilename,
int_or_none,
sanitized_Request,
RetryManager,
ThrottledDownload,
write_xattr,
XAttrMetadataError,
XAttrUnavailableError,
encodeFilename,
int_or_none,
parse_http_range,
sanitized_Request,
try_call,
write_xattr,
)
RESPONSE_READ_EXCEPTIONS = (
TimeoutError,
socket.timeout, # compat: py < 3.10
ConnectionError,
ssl.SSLError,
http.client.HTTPException
)
@ -53,11 +59,11 @@ class HttpFD(FileDownloader):
ctx.open_mode = 'wb'
ctx.resume_len = 0
ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
ctx.chunk_size = None
throttle_start = None
# parse given Range
req_start, req_end, _ = parse_http_range(headers.get('Range'))
if self.params.get('continuedl', True):
# Establish possible resume length
@ -67,9 +73,6 @@ class HttpFD(FileDownloader):
ctx.is_resume = ctx.resume_len > 0
count = 0
retries = self.params.get('retries', 0)
class SucceedDownload(Exception):
pass
@ -80,43 +83,50 @@ class HttpFD(FileDownloader):
class NextFragment(Exception):
pass
def set_range(req, start, end):
range_header = 'bytes=%d-' % start
if end:
range_header += compat_str(end)
req.add_header('Range', range_header)
def establish_connection():
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0:
range_start = ctx.resume_len
if req_start is not None:
# offset the beginning of Range to be within request
range_start += req_start
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
elif req_start is not None:
range_start = req_start
elif ctx.chunk_size > 0:
range_start = 0
else:
range_start = None
ctx.is_resume = False
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
if ctx.chunk_size:
chunk_aware_end = range_start + ctx.chunk_size - 1
# we're not allowed to download outside Range
range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
elif req_end is not None:
# there's no need for chunked downloads, so download until the end of Range
range_end = req_end
else:
range_end = None
if try_call(lambda: range_start > range_end):
ctx.resume_len = 0
ctx.open_mode = 'wb'
raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1
request = sanitized_Request(url, request_data, headers)
has_range = range_start is not None
if has_range:
set_range(request, range_start, range_end)
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
# Establish connection
try:
try:
ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err:
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err)
raise err
ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
@ -124,32 +134,27 @@ class HttpFD(FileDownloader):
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
# Content-Range is present and matches requested Range, resume is possible
if content_range_m:
if range_start == int(content_range_m.group(1)):
content_range_end = int_or_none(content_range_m.group(2))
content_len = int_or_none(content_range_m.group(3))
accept_content_len = (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
or content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
content_range_start, content_range_end, content_len = parse_http_range(content_range)
# Content-Range is present and matches requested Range, resume is possible
if range_start == content_range_start and (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
or content_len < range_end):
ctx.content_len = content_len
if content_len or req_end:
ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
return
except (compat_urllib_error.HTTPError, ) as err:
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
except urllib.error.HTTPError as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
@ -157,7 +162,7 @@ class HttpFD(FileDownloader):
ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
except urllib.error.HTTPError as err:
if err.code < 500 or err.code >= 600:
raise
else:
@ -190,16 +195,22 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
except socket.timeout as err:
except urllib.error.URLError as err:
if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
except socket.error as err:
if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT):
# Connection reset is no problem, just retry
raise RetryDownload(err)
raise
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
# Any errors that occur during this will not be wrapped by URLError
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err)
def close_stream():
if ctx.stream is not None:
if not ctx.tmpfilename == '-':
ctx.stream.close()
ctx.stream = None
def download():
nonlocal throttle_start
data_len = ctx.data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
@ -215,10 +226,12 @@ class HttpFD(FileDownloader):
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
self.to_screen(
f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.')
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
self.to_screen(
f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.')
return False
byte_counter = 0 + ctx.resume_len
@ -230,28 +243,17 @@ class HttpFD(FileDownloader):
before = start # start measuring
def retry(e):
to_stdout = ctx.tmpfilename == '-'
if ctx.stream is not None:
if not to_stdout:
ctx.stream.close()
ctx.stream = None
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
close_stream()
ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
else os.path.getsize(encodeFilename(ctx.tmpfilename)))
raise RetryDownload(e)
while True:
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
retry(e)
except socket.error as e:
# SSLError on python 2 (inherits socket.error) may have
# no errno set but this error message
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
retry(e)
raise
except RESPONSE_READ_EXCEPTIONS as err:
retry(err)
byte_counter += len(data_block)
@ -267,19 +269,19 @@ class HttpFD(FileDownloader):
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except (OSError, IOError) as err:
except OSError as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
ctx.stream.write(data_block)
except (IOError, OSError) as err:
except OSError as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False
@ -322,16 +324,16 @@ class HttpFD(FileDownloader):
if speed and speed < (self.params.get('throttledratelimit') or 0):
# The speed must stay below the limit for 3 seconds
# This prevents raising error when the speed temporarily goes down
if throttle_start is None:
throttle_start = now
elif now - throttle_start > 3:
if ctx.throttle_start is None:
ctx.throttle_start = now
elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close()
raise ThrottledDownload()
elif speed:
throttle_start = None
ctx.throttle_start = None
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
@ -345,9 +347,7 @@ class HttpFD(FileDownloader):
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
if count <= retries:
retry(err)
raise err
retry(err)
self.try_rename(ctx.tmpfilename, ctx.filename)
@ -366,21 +366,20 @@ class HttpFD(FileDownloader):
return True
while count <= retries:
for retry in RetryManager(self.params.get('retries'), self.report_retry):
try:
establish_connection()
return download()
except RetryDownload as e:
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
else:
self.to_screen(f'[download] Got server HTTP error: {e.source_error}')
except RetryDownload as err:
retry.error = err.source_error
continue
except NextFragment:
retry.error = None
retry.attempt -= 1
continue
except SucceedDownload:
return True
self.report_error('giving up after %s retries' % retries)
except: # noqa: E722
close_stream()
raise
return False

View File

@ -1,27 +1,23 @@
from __future__ import unicode_literals
import time
import binascii
import io
import struct
import time
import urllib.error
from .fragment import FragmentFD
from ..compat import (
compat_Struct,
compat_urllib_error,
)
from ..utils import RetryManager
u8 = struct.Struct('>B')
u88 = struct.Struct('>Bx')
u16 = struct.Struct('>H')
u1616 = struct.Struct('>Hxx')
u32 = struct.Struct('>I')
u64 = struct.Struct('>Q')
u8 = compat_Struct('>B')
u88 = compat_Struct('>Bx')
u16 = compat_Struct('>H')
u1616 = compat_Struct('>Hxx')
u32 = compat_Struct('>I')
u64 = compat_Struct('>Q')
s88 = compat_Struct('>bx')
s16 = compat_Struct('>h')
s1616 = compat_Struct('>hxx')
s32 = compat_Struct('>i')
s88 = struct.Struct('>bx')
s16 = struct.Struct('>h')
s1616 = struct.Struct('>hxx')
s32 = struct.Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
@ -142,6 +138,8 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
if fourcc == 'EC-3':
sample_entry_box = box(b'ec-3', sample_entry_payload)
elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
@ -156,7 +154,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode())
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
@ -235,8 +233,6 @@ class IsmFD(FragmentFD):
Download segments in a ISM manifest
"""
FD_NAME = 'ism'
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
@ -252,7 +248,6 @@ class IsmFD(FragmentFD):
'ism_track_written': False,
})
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
@ -260,30 +255,29 @@ class IsmFD(FragmentFD):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
count = 0
while count <= fragment_retries:
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=frag_index, fatal=not skip_unavailable_fragments)
for retry in retry_manager:
try:
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
frag_content = self._read_fragment(ctx)
if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
break
except compat_urllib_error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
if count > fragment_retries:
if skip_unavailable_fragments:
self.report_skip_fragment(frag_index)
except urllib.error.HTTPError as err:
retry.error = err
continue
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
self._finish_frag_download(ctx, info_dict)
if retry_manager.error:
if not skip_unavailable_fragments:
return False
self.report_skip_fragment(frag_index)
return True
return self._finish_frag_download(ctx, info_dict)

View File

@ -1,24 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
import io
import quopri
import re
import uuid
from .fragment import FragmentFD
from ..utils import (
escapeHTML,
formatSeconds,
srt_subtitles_timecode,
urljoin,
)
from ..compat import imghdr
from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
FD_NAME = 'mhtml'
_STYLESHEET = """\
html, body {
margin: 0;
@ -62,7 +53,7 @@ body > figure > img {
def _escape_mime(s):
return '=?utf-8?Q?' + (b''.join(
bytes((b,)) if b >= 0x20 else b'=%02X' % b
for b in quopri.encodestring(s.encode('utf-8'), header=True)
for b in quopri.encodestring(s.encode(), header=True)
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
@ -159,25 +150,22 @@ body > figure > img {
length=len(stub),
title=self._escape_mime(title),
stub=stub
).encode('utf-8'))
).encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):
if (i + 1) <= ctx['fragment_index']:
continue
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
continue
mime_type = b'image/jpeg'
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
mime_type = b'image/png'
if frag_content.startswith((b'GIF87a', b'GIF89a')):
mime_type = b'image/gif'
if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
mime_type = b'image/webp'
frag_content = self._read_fragment(ctx)
frag_header = io.BytesIO()
frag_header.write(
@ -185,7 +173,7 @@ body > figure > img {
frag_header.write(
b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
frag_header.write(
b'Content-type: %b\r\n' % mime_type)
b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
frag_header.write(
b'Content-length: %u\r\n' % len(frag_content))
frag_header.write(
@ -198,5 +186,4 @@ body > figure > img {
ctx['dest_stream'].write(
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
self._finish_frag_download(ctx, info_dict)
return True
return self._finish_frag_download(ctx, info_dict)

View File

@ -1,22 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
import threading
from . import get_suitable_downloader
from .common import FileDownloader
from ..downloader import get_suitable_downloader
from ..extractor.niconico import NiconicoIE
from ..utils import sanitized_Request
class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
FD_NAME = 'niconico_dmc'
def real_download(self, filename, info_dict):
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
from ..extractor.niconico import NiconicoIE
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
@ -54,4 +49,4 @@ class NiconicoDmcFD(FileDownloader):
with heartbeat_lock:
timer[0].cancel()
download_complete = True
return success
return success

View File

@ -1,18 +1,15 @@
from __future__ import unicode_literals
import os
import re
import subprocess
import time
from .common import FileDownloader
from ..compat import compat_str
from ..utils import (
check_executable,
encodeFilename,
encodeArgument,
get_exe_version,
Popen,
check_executable,
encodeArgument,
encodeFilename,
get_exe_version,
)
@ -94,8 +91,7 @@ class RtmpFD(FileDownloader):
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
proc.kill()
proc.wait()
proc.kill(timeout=None)
raise
url = info_dict['url']
@ -146,7 +142,7 @@ class RtmpFD(FileDownloader):
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
elif isinstance(conn, compat_str):
elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]

View File

@ -1,13 +1,8 @@
from __future__ import unicode_literals
import os
import subprocess
from .common import FileDownloader
from ..utils import (
check_executable,
encodeFilename,
)
from ..utils import check_executable, encodeFilename
class RtspFD(FileDownloader):
@ -32,7 +27,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,

View File

@ -1,19 +1,12 @@
import asyncio
import contextlib
import os
import signal
import asyncio
import threading
try:
import websockets
except (ImportError, SyntaxError):
# websockets 3.10 on python 3.6 causes SyntaxError
# See https://github.com/yt-dlp/yt-dlp/issues/2633
has_websockets = False
else:
has_websockets = True
from .common import FileDownloader
from .external import FFmpegFD
from ..dependencies import websockets
class FFmpegSinkFD(FileDownloader):
@ -26,14 +19,12 @@ class FFmpegSinkFD(FileDownloader):
async def call_conn(proc, stdin):
try:
await self.real_connection(stdin, info_dict)
except (BrokenPipeError, OSError):
except OSError:
pass
finally:
try:
with contextlib.suppress(OSError):
stdin.flush()
stdin.close()
except OSError:
pass
os.kill(os.getpid(), signal.SIGINT)
class FFmpegStdinFD(FFmpegFD):

View File

@ -1,29 +1,27 @@
from __future__ import division, unicode_literals
import json
import time
import urllib.error
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
try_get,
RegexNotFoundError,
RetryManager,
dict_get,
int_or_none,
RegexNotFoundError,
try_get,
)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
class YoutubeLiveChatFD(FragmentFD):
""" Downloads YouTube live chats fragment by fragment """
FD_NAME = 'youtube_live_chat'
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
ctx = {
@ -32,7 +30,9 @@ class YoutubeLiveChatFD(FragmentFD):
'total_frags': None,
}
ie = YT_BaseIE(self.ydl)
from ..extractor.youtube import YoutubeBaseInfoExtractor
ie = YoutubeBaseInfoExtractor(self.ydl)
start_time = int(time.time() * 1000)
@ -51,7 +51,7 @@ class YoutubeLiveChatFD(FragmentFD):
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
json.dumps(action, ensure_ascii=False).encode() + b'\n')
if offset is not None:
continuation = try_get(
live_chat_continuation,
@ -93,7 +93,7 @@ class YoutubeLiveChatFD(FragmentFD):
'isLive': True,
}
processed_fragment.extend(
json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
continuation_data_getters = [
lambda x: x['continuations'][0]['invalidationContinuationData'],
lambda x: x['continuations'][0]['timedContinuationData'],
@ -109,12 +109,12 @@ class YoutubeLiveChatFD(FragmentFD):
return continuation_id, live_offset, click_tracking_params
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
count = 0
while count <= fragment_retries:
for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
try:
success, raw_fragment = dl_fragment(url, request_data, headers)
success = dl_fragment(url, request_data, headers)
if not success:
return False, None, None, None
raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
@ -124,27 +124,22 @@ class YoutubeLiveChatFD(FragmentFD):
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
if info_dict['protocol'] == 'youtube_live_chat_replay':
if frag_index == 1:
continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
else:
continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
elif info_dict['protocol'] == 'youtube_live_chat':
continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
return True, continuation_id, offset, click_tracking_params
except compat_urllib_error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
if count > fragment_retries:
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False, None, None, None
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
or frag_index == 1 and try_refresh_replay_beginning
or parse_actions_replay)
return (True, *func(live_chat_continuation))
except urllib.error.HTTPError as err:
retry.error = err
continue
return False, None, None, None
self._prepare_and_start_frag_download(ctx, info_dict)
success, raw_fragment = dl_fragment(info_dict['url'])
success = dl_fragment(info_dict['url'])
if not success:
return False
raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
@ -185,7 +180,7 @@ class YoutubeLiveChatFD(FragmentFD):
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'})
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
url, frag_index, fragment_request_data, headers)
else:
@ -196,8 +191,7 @@ class YoutubeLiveChatFD(FragmentFD):
if test:
break
self._finish_frag_download(ctx, info_dict)
return True
return self._finish_frag_download(ctx, info_dict)
@staticmethod
def parse_live_timestamp(action):

View File

@ -1,33 +1,15 @@
import os
from ..compat.compat_utils import passthrough_module
from ..utils import load_plugins
_LAZY_LOADER = False
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
try:
from .lazy_extractors import *
from .lazy_extractors import _ALL_CLASSES
_LAZY_LOADER = True
except ImportError:
pass
if not _LAZY_LOADER:
from .extractors import *
_ALL_CLASSES = [
klass
for name, klass in globals().items()
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE)
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
passthrough_module(__name__, '.extractors')
del passthrough_module
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
from .extractors import _ALL_CLASSES
return _ALL_CLASSES
@ -38,17 +20,23 @@ def gen_extractors():
return [klass() for klass in gen_extractor_classes()]
def list_extractors(age_limit):
"""
Return a list of extractors that are suitable for the given age,
sorted by extractor ID.
"""
def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
from .generic import GenericIE
return sorted(
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
key=lambda ie: ie.IE_NAME.lower())
yield from sorted(filter(
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
yield GenericIE
def list_extractors(age_limit=None):
"""Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
return [ie() for ie in list_extractor_classes(age_limit)]
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name + 'IE']
from . import extractors
return getattr(extractors, f'{ie_name}IE')

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,3 @@
from __future__ import unicode_literals
import hashlib
import hmac
import re
@ -157,8 +155,6 @@ class ABCIE(InfoExtractor):
'format_id': format_id
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
@ -213,7 +209,7 @@ class ABCIViewIE(InfoExtractor):
'hdnea': token,
})
for sd in ('720', 'sd', 'sd-low'):
for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:
@ -223,7 +219,6 @@ class ABCIViewIE(InfoExtractor):
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if formats:
break
self._sort_formats(formats)
subtitles = {}
src_vtt = stream.get('captions', {}).get('src-vtt')

View File

@ -1,7 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .amp import AMPIE
from .common import InfoExtractor
from ..utils import (

View File

@ -1,7 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@ -82,7 +78,6 @@ class ABCOTVSIE(InfoExtractor):
'url': mp4_url,
'width': 640,
})
self._sort_formats(formats)
image = video.get('image') or {}
@ -123,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor):
title = video_data['title']
formats = self._extract_m3u8_formats(
video_data['videoURL'].split('?')[0], video_id, 'mp4')
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -0,0 +1,522 @@
import base64
import binascii
import functools
import hashlib
import hmac
import io
import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
ExtractorError,
bytes_to_intlist,
decode_base_n,
int_or_none,
intlist_to_bytes,
OnDemandPagedList,
request_to_url,
time_seconds,
traverse_obj,
update_url_query,
)
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
ydl._opener.add_handler(handler)
def remove_opener(ydl, handler):
'''
Remove handler(s) for opening URLs
@param handler Either handler object itself or handler type.
Specifying handler type will remove all handler which isinstance returns True.
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
find_cp = lambda x: x is handler
removed = []
for meth in dir(handler):
if meth in ["redirect_request", "do_open", "proxy_open"]:
# oops, coincidental match
continue
i = meth.find("_")
protocol = meth[:i]
condition = meth[i + 1:]
if condition.startswith("error"):
j = condition.find("_") + i + 1
kind = meth[j + 1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = opener.handle_error.get(protocol, {})
opener.handle_error[protocol] = lookup
elif condition == "open":
kind = protocol
lookup = opener.handle_open
elif condition == "response":
kind = protocol
lookup = opener.process_response
elif condition == "request":
kind = protocol
lookup = opener.process_request
else:
continue
handlers = lookup.setdefault(kind, [])
if handlers:
handlers[:] = [x for x in handlers if not find_cp(x)]
removed.append(x for x in handlers if find_cp(x))
if removed:
for x in opener.handlers:
if find_cp(x):
x.add_parent(None)
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
# the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
self.ie = ie
def _get_videokey_from_ticket(self, ticket):
to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
license_response = self.ie._download_json(
'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
query={'t': media_token},
data=json.dumps({
'kv': 'a',
'lt': ticket
}).encode('utf-8'),
headers={
'Content-Type': 'application/json',
})
res = decode_base_n(license_response['k'], table=self.STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
binascii.unhexlify(self.HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = request_to_url(url)
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor):
_USERTOKEN = None
_DEVICE_ID = None
_MEDIATOKEN = None
_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
@classmethod
def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8')
# add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
time_struct = time.gmtime(ts_1hour)
ts_1hour_str = str(ts_1hour).encode('utf-8')
tmp = None
def mix_once(nonce):
nonlocal tmp
h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
h.update(nonce)
tmp = h.digest()
def mix_tmp(count):
nonlocal tmp
for i in range(count):
mix_once(tmp)
def mix_twist(nonce):
nonlocal tmp
mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
mix_once(cls._SECRETKEY)
mix_tmp(time_struct.tm_mon)
mix_twist(deviceid)
mix_tmp(time_struct.tm_mday % 5)
mix_twist(ts_1hour_str)
mix_tmp(time_struct.tm_hour % 5)
return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
def _get_device_token(self):
if self._USERTOKEN:
return self._USERTOKEN
username, _ = self._get_login_info()
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
if AbemaTVBaseIE._USERTOKEN:
# try authentication with locally stored token
try:
self._get_media_token(True)
return
except ExtractorError as e:
self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
aks = self._generate_aks(self._DEVICE_ID)
user_data = self._download_json(
'https://api.abema.io/v1/users', None, note='Authorizing',
data=json.dumps({
'deviceId': self._DEVICE_ID,
'applicationKeySecret': aks,
}).encode('utf-8'),
headers={
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler)
add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True):
if not invalidate and self._MEDIATOKEN:
return self._MEDIATOKEN
AbemaTVBaseIE._MEDIATOKEN = self._download_json(
'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
query={
'osName': 'android',
'osVersion': '6.0.1',
'osLang': 'ja_JP',
'osTimezone': 'Asia/Tokyo',
'appId': 'tv.abema',
'appVersion': '3.27.1'
}, headers={
'Authorization': f'bearer {self._get_device_token()}',
})['token']
return self._MEDIATOKEN
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
return self._download_json(
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
note=note,
headers={
'Authorization': f'bearer {self._get_device_token()}',
})
def _extract_breadcrumb_list(self, webpage, video_id):
for jld in re.finditer(
r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
webpage):
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
continue
items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
if items:
return items
return []
class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': {
'id': '194-25_s2_p1',
'title': '第1話 「チーズケーキ」 「モーニング再び」',
'series': '異世界食堂2',
'series_number': 2,
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
'episode_number': 1,
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2,
'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': '第5話『光射す』',
'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
'thumbnail': r're:https://hayabusa\.io/.+',
'series': '相棒',
'episode': '第5話『光射す』',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/now-on-air/abema-anime',
'info_dict': {
'id': 'abema-anime',
# this varies
# 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
'is_live': True,
},
'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
}]
_TIMETABLE = None
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
def _real_extract(self, url):
# starting download using infojson from this extractor is undefined behavior,
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
# (unless there's a way to hook before downloading by extractor)
video_id, video_type = self._match_valid_url(url).group('id', 'type')
headers = {
'Authorization': 'Bearer ' + self._get_device_token(),
}
video_type = video_type.split('/')[-1]
webpage = self._download_webpage(url, video_id)
canonical_url = self._search_regex(
r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
default=url)
info = self._search_json_ld(webpage, video_id, default={})
title = self._search_regex(
r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
if not title:
jsonld = None
for jld in re.finditer(
r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
webpage):
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
if jsonld:
break
if jsonld:
title = jsonld.get('caption')
if not title and video_type == 'now-on-air':
if not self._TIMETABLE:
# cache the timetable because it goes to 5MiB in size (!!)
self._TIMETABLE = self._download_json(
'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
headers=headers)
now = time_seconds(hours=9)
for slot in self._TIMETABLE.get('slots', []):
if slot.get('channelId') != video_id:
continue
if slot['startAt'] <= now and now < slot['endAt']:
title = slot['title']
break
# read breadcrumb on top of page
breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
if breadcrumb:
# breadcrumb list translates to: (e.g. 1st test for this IE)
# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
# hence this works
info['series'] = breadcrumb[-2]
info['episode'] = breadcrumb[-1]
if not title:
title = info['episode']
description = self._html_search_regex(
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
webpage, 'description', default=None, group=1)
if not description:
og_desc = self._html_search_meta(
('description', 'og:description', 'twitter:description'), webpage)
if og_desc:
description = re.sub(r'''(?sx)
^(.+?)(?:
アニメの動画を無料で見るならABEMA| # anime
.+ # applies for most of categories
)?
''', r'\1', og_desc)
# canonical URL may contain series and episode number
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
if mobj:
seri = int_or_none(mobj.group(1), default=float('inf'))
epis = int_or_none(mobj.group(2), default=float('inf'))
info['series_number'] = seri if seri < 100 else None
# some anime like Detective Conan (though not available in AbemaTV)
# has more than 1000 episodes (1026 as of 2021/11/15)
info['episode_number'] = epis if epis < 2000 else None
is_live, m3u8_url = False, None
if video_type == 'now-on-air':
is_live = True
channel_url = 'https://api.abema.io/v1/channels'
if video_id == 'news-global':
channel_url = update_url_query(channel_url, {'division': '1'})
onair_channels = self._download_json(channel_url, video_id)
for ch in onair_channels['channels']:
if video_id == ch['id']:
m3u8_url = ch['playback']['hls']
break
else:
raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
elif video_type == 'episode':
api_response = self._download_json(
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
note='Checking playability',
headers=headers)
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
if 3 not in ondemand_types:
# cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream')
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
elif video_type == 'slots':
api_response = self._download_json(
f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
note='Checking playability',
headers=headers)
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
self.report_warning('This is a premium-only stream')
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
else:
raise ExtractorError('Unreachable')
if is_live:
self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', live=is_live)
info.update({
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'is_live': is_live,
})
return info
class AbemaTVTitleIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
_PAGE_SIZE = 25
_TESTS = [{
'url': 'https://abema.tv/video/title/90-1597',
'info_dict': {
'id': '90-1597',
'title': 'シャッフルアイランド',
},
'playlist_mincount': 2,
}, {
'url': 'https://abema.tv/video/title/193-132',
'info_dict': {
'id': '193-132',
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
},
'playlist_mincount': 16,
}, {
'url': 'https://abema.tv/video/title/25-102',
'info_dict': {
'id': '25-102',
'title': 'ソードアート・オンライン アリシゼーション',
},
'playlist_mincount': 24,
}]
def _fetch_page(self, playlist_id, series_version, page):
programs = self._call_api(
f'v1/video/series/{playlist_id}/programs', playlist_id,
note=f'Downloading page {page + 1}',
query={
'seriesVersion': series_version,
'offset': str(page * self._PAGE_SIZE),
'order': 'seq',
'limit': str(self._PAGE_SIZE),
})
yield from (
self.url_result(f'https://abema.tv/video/episode/{x}')
for x in traverse_obj(programs, ('programs', ..., 'id')))
def _entries(self, playlist_id, series_version):
return OnDemandPagedList(
functools.partial(self._fetch_page, playlist_id, series_version),
self._PAGE_SIZE)
def _real_extract(self, url):
playlist_id = self._match_id(url)
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
return self.playlist_result(
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
playlist_title=series_info.get('title'),
playlist_description=series_info.get('content'))

View File

@ -1,5 +1,3 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor

View File

@ -1,7 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,

View File

@ -0,0 +1,199 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
format_field,
int_or_none,
traverse_obj,
parse_codecs,
parse_qs,
)
class AcFunVideoBaseIE(InfoExtractor):
def _extract_metadata(self, video_id, video_info):
playjson = self._parse_json(video_info['ksPlayJson'], video_id)
formats, subtitles = [], {}
for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for f in fmts:
f.update({
'fps': float_or_none(video.get('frameRate')),
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'tbr': float_or_none(video.get('avgBitrate')),
**parse_codecs(video.get('codecs', ''))
})
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'duration': float_or_none(video_info.get('durationMillis'), 1000),
'timestamp': int_or_none(video_info.get('uploadTime'), 1000),
'http_headers': {'Referer': 'https://www.acfun.cn/'},
}
class AcFunVideoIE(AcFunVideoBaseIE):
_VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
_TESTS = [{
'url': 'https://www.acfun.cn/v/ac35457073',
'info_dict': {
'id': '35457073',
'ext': 'mp4',
'duration': 174.208,
'timestamp': 1656403967,
'title': '1 8 岁 现 状',
'description': '“赶紧回去!班主任查班了!”',
'uploader': '锤子game',
'uploader_id': '51246077',
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'upload_date': '20220628',
'like_count': int,
'view_count': int,
'comment_count': int,
'tags': list,
},
}, {
# example for len(video_list) > 1
'url': 'https://www.acfun.cn/v/ac35468952_2',
'info_dict': {
'id': '35468952_2',
'ext': 'mp4',
'title': '【动画剧集】Rocket & Groot Season 12022/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩',
'duration': 90.459,
'uploader': '比令',
'uploader_id': '37259967',
'upload_date': '20220629',
'timestamp': 1656479962,
'tags': list,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id)
title = json_all.get('title')
video_list = json_all.get('videoList') or []
video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id'))
if video_internal_id and len(video_list) > 1:
part_idx, part_video_info = next(
(idx + 1, v) for (idx, v) in enumerate(video_list)
if v['id'] == video_internal_id)
title = f'{title} P{part_idx:02d} {part_video_info["title"]}'
return {
**self._extract_metadata(video_id, json_all['currentVideoInfo']),
'title': title,
'thumbnail': json_all.get('coverUrl'),
'description': json_all.get('description'),
'uploader': traverse_obj(json_all, ('user', 'name')),
'uploader_id': traverse_obj(json_all, ('user', 'href')),
'tags': traverse_obj(json_all, ('tagList', ..., 'name')),
'view_count': int_or_none(json_all.get('viewCount')),
'like_count': int_or_none(json_all.get('likeCountShow')),
'comment_count': int_or_none(json_all.get('commentCountShow')),
}
class AcFunBangumiIE(AcFunVideoBaseIE):
_VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)'
_TESTS = [{
'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2',
'info_dict': {
'id': 'aa6002917_36188_1745457__2',
'ext': 'mp4',
'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV',
'upload_date': '20200916',
'timestamp': 1600243813,
'duration': 92.091,
},
}, {
'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645',
'info_dict': {
'id': 'aa5023171_36188_1750645',
'ext': 'mp4',
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记',
'season_id': 5023171,
'season_number': 1, # series has only 1 season
'episode': 'Episode 5',
'episode_number': 5,
'upload_date': '20181223',
'timestamp': 1545552185,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'comment_count': int,
},
}, {
'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061',
'info_dict': {
'id': 'aa6065485_36188_1885061',
'ext': 'mp4',
'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)',
'season_number': 2,
'season_id': 6065485,
'episode': '坚不可摧',
'episode_number': 5,
'upload_date': '20220324',
'timestamp': 1648082786,
'duration': 105.002,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'comment_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
ac_idx = parse_qs(url).get('ac', [None])[-1]
video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}'
webpage = self._download_webpage(url, video_id)
json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
if ac_idx:
video_info = json_bangumi_data['hlVideoInfo']
return {
**self._extract_metadata(video_id, video_info),
'title': video_info.get('title'),
}
video_info = json_bangumi_data['currentVideoInfo']
season_id = json_bangumi_data.get('bangumiId')
season_number = season_id and next((
idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1)
if v.get('id') == season_id), 1)
json_bangumi_list = self._search_json(
r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False)
video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
episode_number = video_internal_id and next((
idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
if v.get('videoId') == video_internal_id), None)
return {
**self._extract_metadata(video_id, video_info),
'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'),
'season_id': season_id,
'season_number': season_number,
'episode': json_bangumi_data.get('title'),
'episode_number': episode_number,
'comment_count': int_or_none(json_bangumi_data.get('commentCount')),
}

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import binascii
import json
@ -31,30 +28,34 @@ from ..utils import (
class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'md5': '0319c99885ff5547565cacb4f3f9348d',
IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '7778',
'id': '9841',
'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
'series': 'Blue Exorcist - Kyôto Saga',
'duration': 1467,
'release_date': '20170106',
'title': 'Fruits Basket - Episode 1',
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
'series': 'Fruits Basket',
'duration': 1437,
'release_date': '20190405',
'comment_count': int,
'average_rating': float,
'season_number': 2,
'episode': 'Début des hostilités',
'season_number': 1,
'episode': 'À ce soir !',
'episode_number': 1,
}
}
},
'skip': 'Only available in region (FR, ...)',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}]
_NETRC_MACHINE = 'animedigitalnetwork'
_BASE_URL = 'http://animedigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
@ -78,14 +79,14 @@ class ADNIE(InfoExtractor):
if subtitle_location:
enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data',
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
compat_b64decode(enc_subtitles[:24])))
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
if not subtitles_json:
@ -126,10 +127,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}])
return subtitles
def _real_initialize(self):
username, password = self._get_login_info()
if not username:
return
def _perform_login(self, username, password):
try:
access_token = (self._download_json(
self._API_BASE_URL + 'authentication/login', None,
@ -170,7 +168,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}, data=b'')['token']
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({
'k': self._K,
't': token,
@ -237,7 +235,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
self._sort_formats(formats)
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
@ -14,7 +11,7 @@ class AdobeConnectIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []

View File

@ -1,26 +1,20 @@
# coding: utf-8
from __future__ import unicode_literals
import getpass
import json
import re
import time
import urllib.error
import xml.etree.ElementTree as etree
from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_urlparse,
compat_getpass
)
from ..compat import compat_urlparse
from ..utils import (
unescapeHTML,
urlencode_postdata,
unified_timestamp,
ExtractorError,
NO_DEFAULT,
ExtractorError,
unescapeHTML,
unified_timestamp,
urlencode_postdata,
)
MSO_INFO = {
'DTV': {
'name': 'DIRECTV',
@ -1345,10 +1339,20 @@ MSO_INFO = {
'username_field': 'username',
'password_field': 'password',
},
'Suddenlink': {
'name': 'Suddenlink',
'username_field': 'username',
'password_field': 'password',
},
'AlticeOne': {
'name': 'Optimum TV',
'username_field': 'j_username',
'password_field': 'j_password',
},
}
class AdobePassIE(InfoExtractor):
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd'
@ -1360,7 +1364,7 @@ class AdobePassIE(InfoExtractor):
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
*args, **kwargs)
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
@ -1429,32 +1433,34 @@ class AdobePassIE(InfoExtractor):
guid = xml_text(resource, 'guid') if '<' in resource else resource
count = 0
while count < 2:
requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
authn_token = None
if not authn_token:
# TODO add support for other TV Providers
mso_id = self.get_param('ap_mso')
if mso_id:
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
if not username or not password:
raise_mvpd_required()
mso_info = MSO_INFO[mso_id]
provider_redirect_page_res = self._download_webpage_handle(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
elif not self._cookies_passed:
raise_mvpd_required()
if not mso_id:
raise_mvpd_required()
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
if not username or not password:
raise_mvpd_required()
mso_info = MSO_INFO[mso_id]
provider_redirect_page_res = self._download_webpage_handle(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
if mso_id == 'Comcast_SSO':
pass
elif mso_id == 'Comcast_SSO':
# Comcast page flow varies by video site and whether you
# are on Comcast's network.
provider_redirect_page, urlh = provider_redirect_page_res
@ -1502,7 +1508,7 @@ class AdobePassIE(InfoExtractor):
'send_confirm_link': False,
'send_token': True
}))
philo_code = compat_getpass('Type auth code you have received [Return]: ')
philo_code = getpass.getpass('Type auth code you have received [Return]: ')
self._download_webpage(
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
'token': philo_code
@ -1635,6 +1641,58 @@ class AdobePassIE(InfoExtractor):
urlh.geturl(), video_id, 'Sending final bookend',
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
elif mso_id == 'Suddenlink':
# Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
# but they also do a dynmaic redirect using javascript that has to be followed as well
first_bookend_page, urlh = post_form(
provider_redirect_page_res, 'Pressing Continue...')
hidden_data = self._hidden_inputs(first_bookend_page)
hidden_data['history_val'] = 1
provider_login_redirect_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending First Bookend',
query=hidden_data)
provider_login_redirect_page, urlh = provider_login_redirect_page_res
# Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already
# have the login prompt or not
if 'id="password" type="password" name="password"' in provider_login_redirect_page:
provider_login_page_res = provider_login_redirect_page_res
else:
provider_tryauth_url = self._html_search_regex(
r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
provider_tryauth_page = self._download_webpage(
provider_tryauth_url, video_id, 'Submitting TryAuth',
query=hidden_data)
provider_login_page_res = self._download_webpage_handle(
f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
video_id, 'Getting Login Page',
query=hidden_data)
provider_association_redirect, urlh = post_form(
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password
})
provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl())
last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
'Downloading Auth Association Redirect Page')
hidden_data = self._hidden_inputs(last_bookend_page)
hidden_data['history_val'] = 3
mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending Final Bookend',
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
@ -1652,25 +1710,30 @@ class AdobePassIE(InfoExtractor):
mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password
}
if mso_id == 'Cablevision':
if mso_id in ('Cablevision', 'AlticeOne'):
form_data['_eventId_proceed'] = ''
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
if mso_id != 'Rogers':
post_form(mvpd_confirm_page_res, 'Confirming Login')
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
try:
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
except ExtractorError as e:
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
raise_mvpd_required()
raise
if '<pendingLogout' in session:
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
@ -1686,14 +1749,14 @@ class AdobePassIE(InfoExtractor):
'userMeta': '1',
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
@ -1709,7 +1772,7 @@ class AdobePassIE(InfoExtractor):
'hashed_guid': 'false',
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
return short_authorize

View File

@ -1,5 +1,3 @@
from __future__ import unicode_literals
import functools
import re
@ -72,7 +70,6 @@ class AdobeTVBaseIE(InfoExtractor):
})
s3_extracted = True
formats.append(f)
self._sort_formats(formats)
return {
'id': video_id,
@ -234,6 +231,7 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
class AdobeTVVideoIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
_TEST = {
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
@ -270,7 +268,6 @@ class AdobeTVVideoIE(AdobeTVBaseIE):
'width': int_or_none(source.get('width') or None),
'url': source_src,
})
self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .turner import TurnerBaseIE
@ -183,7 +180,6 @@ class AdultSwimIE(TurnerBaseIE):
info['subtitles'].setdefault('en', []).append({
'url': asset_url,
})
self._sort_formats(info['formats'])
return info
else:

View File

@ -1,7 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .theplatform import ThePlatformIE
from ..utils import (
ExtractorError,
@ -12,7 +8,7 @@ from ..utils import (
)
class AENetworksBaseIE(ThePlatformIE):
class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_BASE_URL_REGEX = r'''(?x)https?://
(?:(?:www|play|watch)\.)?
(?P<domain>
@ -32,14 +28,17 @@ class AENetworksBaseIE(ThePlatformIE):
}
def _extract_aen_smil(self, smil_url, video_id, auth=None):
query = {'mbr': 'true'}
query = {
'mbr': 'true',
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
}
if auth:
query['auth'] = auth
TP_SMIL_QUERY = [{
'assetTypes': 'high_video_ak',
'switch': 'hls_high_ak'
'switch': 'hls_high_ak',
}, {
'assetTypes': 'high_video_s3'
'assetTypes': 'high_video_s3',
}, {
'assetTypes': 'high_video_s3',
'switch': 'hls_high_fastly',
@ -63,7 +62,6 @@ class AENetworksBaseIE(ThePlatformIE):
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
@ -305,7 +303,6 @@ class HistoryTopicIE(AENetworksBaseIE):
class HistoryPlayerIE(AENetworksBaseIE):
IE_NAME = 'history:player'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
_TESTS = []
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups()

View File

@ -0,0 +1,40 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
class AeonCoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P<id>[^/?]+)'
_TESTS = [{
'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery',
'md5': 'e5884d80552c9b6ea8d268a258753362',
'info_dict': {
'id': '1284717',
'ext': 'mp4',
'title': 'Brilliant Noise',
'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960',
'uploader': 'Semiconductor',
'uploader_id': 'semiconductor',
'uploader_url': 'https://vimeo.com/semiconductor',
'duration': 348
}
}, {
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
'info_dict': {
'id': '728595228',
'ext': 'mp4',
'title': 'Wrought',
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
'uploader': 'Biofilm Productions',
'uploader_id': 'user140352216',
'uploader_url': 'https://vimeo.com/user140352216',
'duration': 1344
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
return self.url_result(vimeo_url, VimeoIE)

View File

@ -1,14 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
import functools
import re
from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
ExtractorError,
OnDemandPagedList,
date_from_str,
determine_ext,
ExtractorError,
int_or_none,
qualities,
traverse_obj,
@ -32,7 +30,7 @@ class AfreecaTVIE(InfoExtractor):
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/PLAYER/STATION/
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)
'''
@ -170,6 +168,9 @@ class AfreecaTVIE(InfoExtractor):
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/player/15055030',
'only_matching': True,
}]
@staticmethod
@ -181,14 +182,7 @@ class AfreecaTVIE(InfoExtractor):
video_key['part'] = int(m.group('part'))
return video_key
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
def _perform_login(self, username, password):
login_form = {
'szWork': 'login',
'szType': 'json',
@ -284,7 +278,7 @@ class AfreecaTVIE(InfoExtractor):
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
video_element = video_xml.findall('./track/video')[-1]
if video_element is None or video_element.text is None:
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
@ -314,7 +308,7 @@ class AfreecaTVIE(InfoExtractor):
if not video_url:
entries = []
file_elements = video_element.findall(compat_xpath('./file'))
file_elements = video_element.findall('./file')
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = url_or_none(file_element.text)
@ -344,7 +338,6 @@ class AfreecaTVIE(InfoExtractor):
}]
if not formats and not self.get_param('ignore_no_formats'):
continue
self._sort_formats(formats)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
@ -386,7 +379,7 @@ class AfreecaTVIE(InfoExtractor):
return info
class AfreecaTVLiveIE(AfreecaTVIE):
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'afreecatv:live'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
@ -416,26 +409,35 @@ class AfreecaTVLiveIE(AfreecaTVIE):
def _real_extract(self, url):
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
password = self.get_param('videopassword')
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
data=urlencode_postdata({'bid': broadcaster_id})) or {}
channel_info = info.get('CHANNEL') or {}
broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no
password_protected = channel_info.get('BPWD')
if not broadcast_no:
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
if password_protected == 'Y' and password is None:
raise ExtractorError(
'This livestream is protected by a password, use the --video-password option',
expected=True)
formats = []
quality_key = qualities(self._QUALITIES)
for quality_str in self._QUALITIES:
params = {
'bno': broadcast_no,
'stream_type': 'common',
'type': 'aid',
'quality': quality_str,
}
if password is not None:
params['pwd'] = password
aid_response = self._download_json(
self._LIVE_API_URL, broadcast_no, fatal=False,
data=urlencode_postdata({
'bno': broadcast_no,
'stream_type': 'common',
'type': 'aid',
'quality': quality_str,
}),
data=urlencode_postdata(params),
note=f'Downloading access token for {quality_str} stream',
errnote=f'Unable to download access token for {quality_str} stream')
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
@ -461,8 +463,6 @@ class AfreecaTVLiveIE(AfreecaTVIE):
'quality': quality_key(quality_str),
})
self._sort_formats(formats)
station_info = self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
query={'szBjId': broadcaster_id}, fatal=False,
@ -477,3 +477,57 @@ class AfreecaTVLiveIE(AfreecaTVIE):
'formats': formats,
'is_live': True,
}
class AfreecaTVUserIE(InfoExtractor):
IE_NAME = 'afreecatv:user'
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
_TESTS = [{
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - review',
},
'playlist_count': 218,
}, {
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
'info_dict': {
'_type': 'playlist',
'id': 'parang1995',
'title': 'parang1995 - highlight',
},
'playlist_count': 997,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - all',
},
'playlist_count': 221,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - balloonclip',
},
'playlist_count': 0,
}]
_PER_PAGE = 60
def _fetch_page(self, user_id, user_type, page):
page += 1
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}')
for item in info['data']:
yield self.url_result(
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
user_type = user_type or 'all'
entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')

View File

@ -0,0 +1,251 @@
import functools
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
month_by_name,
parse_duration,
try_call,
)
class WyborczaVideoIE(InfoExtractor):
# this id is not an article id, it has to be extracted from the article
_VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
IE_NAME = 'wyborcza:video'
_TESTS = [{
'url': 'wyborcza:video:26207634',
'info_dict': {
'id': '26207634',
'ext': 'mp4',
'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
'description': ' ',
'uploader': 'Dorota Roman',
'duration': 2474,
'thumbnail': r're:https://.+\.jpg',
},
}, {
'url': 'https://wyborcza.pl/video/26207634',
'only_matching': True,
}, {
'url': 'https://wyborcza.pl/api-video/26207634',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id)
formats = []
base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath']
for quality in ('standard', 'high'):
if not meta['files'].get(quality):
continue
formats.append({
'url': base_url + meta['files'][quality],
'height': int_or_none(
self._search_regex(
r'p(\d+)[a-z]+\.mp4$', meta['files'][quality],
'mp4 video height', default=None)),
'format_id': quality,
})
if meta['files'].get('dash'):
formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
return {
'id': video_id,
'formats': formats,
'title': meta.get('title'),
'description': meta.get('lead'),
'uploader': meta.get('signature'),
'thumbnail': meta.get('imageUrl'),
'duration': meta.get('duration'),
}
class WyborczaPodcastIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:www\.)?(?:
wyborcza\.pl/podcast(?:/0,172673\.html)?|
wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
)(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
'''
_TESTS = [{
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
'info_dict': {
'id': '100720',
'ext': 'mp3',
'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
'uploader': 'Michał Nogaś ',
'upload_date': '20210117',
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
'duration': 3684.0,
'thumbnail': r're:https://.+\.jpg',
},
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
'info_dict': {
'id': '100673',
'ext': 'mp3',
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
'uploader': 'Agnieszka Urazińska ',
'upload_date': '20210115',
'description': 'md5:c161dc035f8dbb60077011fc41274899',
'duration': 1803.0,
'thumbnail': r're:https://.+\.jpg',
},
}, {
'url': 'https://wyborcza.pl/podcast',
'info_dict': {
'id': '334',
'title': 'Gościnnie: Wyborcza, 8:10',
'series': 'Gościnnie: Wyborcza, 8:10',
},
'playlist_mincount': 370,
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
'info_dict': {
'id': '395',
'title': 'Gościnnie: Wysokie Obcasy',
'series': 'Gościnnie: Wysokie Obcasy',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
podcast_id = self._match_id(url)
if not podcast_id: # playlist
podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334'
return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id)
meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id,
query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None})
day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'),
'upload date', group=(1, 2, 3), default=(None, None, None))
return {
'id': podcast_id,
'url': meta['url'],
'title': meta.get('title'),
'description': meta.get('description'),
'thumbnail': meta.get('imageUrl'),
'duration': parse_duration(meta.get('duration')),
'uploader': meta.get('author'),
'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
}
class TokFMPodcastIE(InfoExtractor):
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
IE_NAME = 'tokfm:podcast'
_TESTS = [{
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
'info_dict': {
'id': '91275',
'ext': 'aac',
'title': 'md5:a9b15488009065556900169fb8061cce',
'episode': 'md5:a9b15488009065556900169fb8061cce',
'series': 'Analizy',
},
}]
def _real_extract(self, url):
media_id = self._match_id(url)
# in case it breaks see this but it returns a lot of useless data
# https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
metadata = self._download_json(
f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata')
if not metadata:
raise ExtractorError('No such podcast', expected=True)
metadata = metadata[0]
formats = []
for ext in ('aac', 'mp3'):
url_data = self._download_json(
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
media_id, 'Downloading podcast %s URL' % ext)
# prevents inserting the mp3 (default) multiple times
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
formats.append({
'url': url_data['link_ssl'],
'ext': ext,
'vcodec': 'none',
'acodec': ext,
})
return {
'id': media_id,
'formats': formats,
'title': metadata.get('podcast_name'),
'series': metadata.get('series_name'),
'episode': metadata.get('podcast_name'),
}
class TokFMAuditionIE(InfoExtractor):
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
IE_NAME = 'tokfm:audition'
_TESTS = [{
'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
'info_dict': {
'id': '218',
'title': 'Analizy',
'series': 'Analizy',
},
'playlist_count': 1635,
}]
_PAGE_SIZE = 30
_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
}
@staticmethod
def _create_url(id):
return f'https://audycje.tokfm.pl/audycja/{id}'
def _real_extract(self, url):
audition_id = self._match_id(url)
data = self._download_json(
f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
audition_id, 'Downloading audition metadata', headers=self._HEADERS)
if not data:
raise ExtractorError('No such audition', expected=True)
data = data[0]
entries = OnDemandPagedList(functools.partial(
self._fetch_page, audition_id, data), self._PAGE_SIZE)
return {
'_type': 'playlist',
'id': audition_id,
'title': data.get('series_name'),
'series': data.get('series_name'),
'entries': entries,
}
def _fetch_page(self, audition_id, data, page):
for retry in self.RetryManager():
podcast_page = self._download_json(
f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS)
if not podcast_page:
retry.error = ExtractorError('Agora returned empty page', expected=True)
for podcast in podcast_page:
yield {
'_type': 'url_transparent',
'url': podcast['podcast_sharing_url'],
'ie_key': TokFMPodcastIE.ie_key(),
'title': podcast.get('podcast_name'),
'episode': podcast.get('podcast_name'),
'description': podcast.get('podcast_description'),
'timestamp': int_or_none(podcast.get('podcast_timestamp')),
'series': data.get('series_name'),
}

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor

View File

@ -0,0 +1,96 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj
)
class AirTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
_TESTS = [{
# without youtube_id
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
'info_dict': {
'id': 'W87jcWleSn2hXZN47zJZsQ',
'ext': 'mp4',
'release_date': '20221003',
'release_timestamp': 1664792603,
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
'upload_date': '20221003',
'duration': 151,
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
}
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
'info_dict': {
'id': '2ZTqmpee-bQ',
'ext': 'mp4',
'comment_count': int,
'tags': 'count:11',
'channel_follower_count': int,
'like_count': int,
'uploader': 'Newsflare',
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
'availability': 'public',
'title': 'Geese Chase Alligator Across Golf Course',
'uploader_id': 'NewsflareBreaking',
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
'age_limit': 0,
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
'view_count': int,
'categories': ['News & Politics'],
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
}
}]
def _get_formats_and_subtitle(self, json_data, video_id):
formats, subtitles = [], {}
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({'url': source.get('src'), 'ext': ext})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
if nextjs_json.get('youtube_id'):
return self.url_result(
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
return {
'id': display_id,
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': nextjs_json.get('description') or None,
'duration': int_or_none(nextjs_json.get('duration')),
'thumbnails': [
{'url': thumbnail}
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
'timestamp': parse_iso8601(nextjs_json.get('created')),
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
'view_count': int_or_none(nextjs_json.get('views')),
}

View File

@ -0,0 +1,60 @@
from .common import InfoExtractor
from ..utils import int_or_none, merge_dicts
class AitubeKZVideoIE(InfoExtractor):
_VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P<id>[\w-]+)'
_TESTS = [{
# id paramater as first parameter
'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1',
'info_dict': {
'id': '9291d29b-c038-49a1-ad42-3da2051d353c',
'ext': 'mp4',
'duration': 2174.0,
'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe',
'like_count': int,
'channel': 'ASTANA TV',
'comment_count': int,
'view_count': int,
'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими',
'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b',
'upload_date': '20221102',
'timestamp': 1667370519,
'title': 'Ангел хранитель 1 серия',
'channel_follower_count': int,
}
}, {
# embed url
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
'only_matching': True,
}, {
# id parameter is not as first paramater
'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo']
json_ld_data = self._search_json_ld(webpage, video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id)
return merge_dicts({
'id': video_id,
'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage),
'description': nextjs_data.get('description'),
'formats': formats,
'subtitles': subtitles,
'view_count': (nextjs_data.get('viewCount')
or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))),
'like_count': nextjs_data.get('likeCount'),
'channel': nextjs_data.get('channelTitle'),
'channel_id': nextjs_data.get('channelId'),
'thumbnail': nextjs_data.get('coverUrl'),
'comment_count': nextjs_data.get('commentCount'),
'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')),
}, json_ld_data)

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@ -18,7 +15,7 @@ class AliExpressLiveIE(InfoExtractor):
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
'thumbnail': r're:http://.*\.jpg',
'thumbnail': r're:https?://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor

View File

@ -1,12 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
qualities,
remove_end,
strip_or_none,
try_get,
unified_timestamp,
url_basename,
@ -102,10 +100,7 @@ class AllocineIE(InfoExtractor):
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
title = remove_end(
self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
' - AlloCiné')
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
for key, value in media_data['video'].items():
if not key.endswith('Path'):
continue
@ -117,8 +112,6 @@ class AllocineIE(InfoExtractor):
})
duration, view_count, timestamp = [None] * 3
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,

View File

@ -1,5 +1,3 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,

View File

@ -0,0 +1,83 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
dict_get,
get_element_by_class,
int_or_none,
unified_strdate,
url_or_none,
)
class Alsace20TVBaseIE(InfoExtractor):
def _extract_video(self, video_id, url=None):
info = self._download_json(
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
video_id) or {}
title = info.get('titre')
formats = []
for res, fmt_url in (info.get('files') or {}).items():
formats.extend(
self._extract_smil_formats(fmt_url, video_id, fatal=False)
if '/smil:_' in fmt_url
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
return {
'id': video_id,
'title': title,
'formats': formats,
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
'view_count': int_or_none(info.get('nb_vues')),
}
class Alsace20TVIE(Alsace20TVBaseIE):
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
_TESTS = [{
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
'info_dict': {
'id': 'lyNHCXpYJh',
'ext': 'mp4',
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
'title': 'Votre JT du jeudi 3 février',
'upload_date': '20220203',
'thumbnail': r're:https?://.+\.jpg',
'duration': 1073,
'view_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_video(video_id, url)
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
_TESTS = [{
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
'info_dict': {
'id': 'lyNHCXpYJh',
'ext': 'mp4',
'title': 'Votre JT du jeudi 3 février',
'upload_date': '20220203',
'thumbnail': r're:https?://.+\.jpg',
'view_count': int,
},
'params': {
'format': 'bestvideo',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_video(video_id)

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -66,22 +63,13 @@ class AluraIE(InfoExtractor):
f['height'] = int('720' if m.group('res') == 'hd' else '480')
formats.extend(video_format)
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
"formats": formats
}
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
pass
def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
@ -123,7 +111,7 @@ class AluraIE(InfoExtractor):
raise ExtractorError('Unable to log in')
class AluraCourseIE(AluraIE):
class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
_LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE

View File

@ -1,6 +1,17 @@
# coding: utf-8
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
ExtractorError,
clean_html,
float_or_none,
get_element_by_attribute,
get_element_by_class,
int_or_none,
js_to_json,
traverse_obj,
url_or_none,
)
class AmazonStoreIE(InfoExtractor):
@ -10,7 +21,7 @@ class AmazonStoreIE(InfoExtractor):
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
'info_dict': {
'id': 'B098XNCHLD',
'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed',
'title': str,
},
'playlist_mincount': 1,
'playlist': [{
@ -19,28 +30,48 @@ class AmazonStoreIE(InfoExtractor):
'ext': 'mp4',
'title': 'mcdodo usb c cable 100W 5a',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 34,
},
}]
}],
'expected_warnings': ['Unable to extract data'],
}, {
'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
'info_dict': {
'id': 'B0863TXGM3',
'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff',
'title': str,
},
'playlist_mincount': 4,
'expected_warnings': ['Unable to extract data'],
}, {
'url': 'https://www.amazon.com/dp/B0845NXCXF/',
'info_dict': {
'id': 'B0845NXCXF',
'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171',
'title': str,
},
'playlist-mincount': 1,
'expected_warnings': ['Unable to extract data'],
}, {
'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
'info_dict': {
'id': 'B08WX337PQ',
'title': str,
},
'playlist_mincount': 1,
'expected_warnings': ['Unable to extract data'],
}]
def _real_extract(self, url):
id = self._match_id(url)
webpage = self._download_webpage(url, id)
data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id)
for retry in self.RetryManager():
webpage = self._download_webpage(url, id)
try:
data_json = self._search_json(
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
transform_source=js_to_json)
except ExtractorError as e:
retry.error = e
entries = [{
'id': video['marketPlaceID'],
'url': video['url'],
@ -50,4 +81,90 @@ class AmazonStoreIE(InfoExtractor):
'height': int_or_none(video.get('videoHeight')),
'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title'])
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
class AmazonReviewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P<id>[^/&#$?]+)'
_TESTS = [{
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl',
'info_dict': {
'id': 'R10VE9VUSY19L3',
'ext': 'mp4',
'title': 'Get squad #Suspicious',
'description': 'md5:7012695052f440a1e064e402d87e0afb',
'uploader': 'Kimberly Cronkright',
'average_rating': 1.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}, {
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US',
'info_dict': {
'id': 'R10VE9VUSY19L3',
'ext': 'mp4',
'title': 'Get squad #Suspicious',
'description': 'md5:7012695052f440a1e064e402d87e0afb',
'uploader': 'Kimberly Cronkright',
'average_rating': 1.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}, {
'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/',
'info_dict': {
'id': 'RV1CO8JN5VGXV',
'ext': 'mp4',
'title': 'Not sure about its durability',
'description': 'md5:1a252c106357f0a3109ebf37d2e87494',
'uploader': 'Shoaib Gulzar',
'average_rating': 2.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
for retry in self.RetryManager():
webpage = self._download_webpage(url, video_id)
review_body = get_element_by_attribute('data-hook', 'review-body', webpage)
if not review_body:
retry.error = ExtractorError('Review body was not found in webpage', expected=True)
formats, subtitles = [], {}
manifest_url = self._search_regex(
r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None)
if url_or_none(manifest_url):
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4', fatal=False)
formats.extend(fmts)
video_url = self._search_regex(
r'<input[^>]+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None)
if url_or_none(video_url):
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': 'http-mp4',
})
if not formats:
self.raise_no_formats('No video found for this customer review', expected=True)
return {
'id': video_id,
'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage))
or self._html_extract_title(webpage)),
'description': clean_html(traverse_obj(re.findall(
r'<span(?:\s+class="cr-original-review-content")?>(.+?)</span>', review_body), -1)),
'uploader': clean_html(get_element_by_class('a-profile-name', webpage)),
'average_rating': float_or_none(clean_html(get_element_by_attribute(
'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]),
'thumbnail': self._search_regex(
r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -0,0 +1,290 @@
import json
from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
class AmazonMiniTVBaseIE(InfoExtractor):
def _real_initialize(self):
self._download_webpage(
'https://www.amazon.in/minitv', None,
note='Fetching guest session cookies')
AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
def _call_api(self, asin, data=None, note=None):
device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
if data:
data['variables'].update({
'contentType': 'VOD',
'sessionIdToken': self.session_id,
**device,
})
resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
asin, note=note, headers={'Content-Type': 'application/json'},
data=json.dumps(data).encode() if data else None,
query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin,
**device,
})
if resp.get('errors'):
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
elif not data:
return resp
return resp['data'][data['operationName']]
class AmazonMiniTVIE(AmazonMiniTVBaseIE):
_VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
_TESTS = [{
'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
'info_dict': {
'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
'ext': 'mp4',
'title': 'May I Kiss You?',
'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:a549bfc747973e04feb707833474e59d',
'release_timestamp': 1644710400,
'release_date': '20220213',
'duration': 846,
'chapters': 'count:2',
'series': 'Couple Goals',
'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
'season': 'Season 3',
'season_number': 3,
'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
'episode': 'May I Kiss You?',
'episode_number': 2,
'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
},
}, {
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
'info_dict': {
'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
'ext': 'mp4',
'title': 'Jahaan',
'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:05eb765a77bf703f322f120ec6867339',
'release_timestamp': 1647475200,
'release_date': '20220317',
'duration': 783,
'chapters': [],
},
}, {
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
'only_matching': True,
}, {
'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
'only_matching': True,
}, {
'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
'only_matching': True,
}]
_GRAPHQL_QUERY_CONTENT = '''
query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
content(
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
contentId: $contentId
contentType: $contentType
) {
contentId
name
... on Episode {
contentId
vodType
name
images
description {
synopsis
contentLengthInSeconds
}
publicReleaseDateUTC
audioTracks
seasonId
seriesId
seriesName
seasonNumber
episodeNumber
timecode {
endCreditsTime
}
}
... on MovieContent {
contentId
vodType
name
description {
synopsis
contentLengthInSeconds
}
images
publicReleaseDateUTC
audioTracks
}
}
}'''
def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}'
prs = self._call_api(asin, note='Downloading playback info')
formats, subtitles = [], {}
for type_, asset in prs['playbackAssets'].items():
if not traverse_obj(asset, 'manifestUrl'):
continue
if type_ == 'hls':
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=type_, fatal=False)
formats.extend(m3u8_fmts)
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif type_ == 'dash':
mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
formats.extend(mpd_fmts)
subtitles = self._merge_subtitles(subtitles, mpd_subs)
else:
self.report_warning(f'Unknown asset type: {type_}')
title_info = self._call_api(
asin, note='Downloading title info', data={
'operationName': 'content',
'variables': {'contentId': asin},
'query': self._GRAPHQL_QUERY_CONTENT,
})
credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
is_episode = title_info.get('vodType') == 'EPISODE'
return {
'id': asin,
'title': title_info.get('name'),
'formats': formats,
'subtitles': subtitles,
'language': traverse_obj(title_info, ('audioTracks', 0)),
'thumbnails': [{
'id': type_,
'url': url,
} for type_, url in (title_info.get('images') or {}).items()],
'description': traverse_obj(title_info, ('description', 'synopsis')),
'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
'chapters': [{
'start_time': credits_time,
'title': 'End Credits',
}] if credits_time else [],
'series': title_info.get('seriesName'),
'series_id': title_info.get('seriesId'),
'season_number': title_info.get('seasonNumber'),
'season_id': title_info.get('seasonId'),
'episode': title_info.get('name') if is_episode else None,
'episode_number': title_info.get('episodeNumber'),
'episode_id': asin if is_episode else None,
}
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:season'
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
_TESTS = [{
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
'playlist_mincount': 6,
'info_dict': {
'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
},
}, {
'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
'only_matching': True,
}]
_GRAPHQL_QUERY = '''
query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
getEpisodes(
applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
episodeOrSeasonId: $episodeOrSeasonId
) {
episodes {
... on Episode {
contentId
name
images
seriesName
seasonId
seriesId
seasonNumber
episodeNumber
description {
synopsis
contentLengthInSeconds
}
publicReleaseDateUTC
}
}
}
}
'''
def _entries(self, asin):
season_info = self._call_api(
asin, note='Downloading season info', data={
'operationName': 'getEpisodes',
'variables': {'episodeOrSeasonId': asin},
'query': self._GRAPHQL_QUERY,
})
for episode in season_info['episodes']:
yield self.url_result(
f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}'
return self.playlist_result(self._entries(asin), asin)
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:series'
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
_TESTS = [{
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
'playlist_mincount': 3,
'info_dict': {
'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
},
}, {
'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
'only_matching': True,
}]
_GRAPHQL_QUERY = '''
query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
getSeasons(
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
) {
seasons {
seasonId
}
}
}
'''
def _entries(self, asin):
season_info = self._call_api(
asin, note='Downloading series info', data={
'operationName': 'getSeasons',
'variables': {'episodeOrSeasonOrSeriesId': asin},
'query': self._GRAPHQL_QUERY,
})
for season in season_info['seasons']:
yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}'
return self.playlist_result(self._entries(asin), asin)

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE
@ -12,7 +9,7 @@ from ..utils import (
)
class AMCNetworksIE(ThePlatformIE):
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
@ -109,7 +106,6 @@ class AMCNetworksIE(ThePlatformIE):
media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
self._sort_formats(formats)
thumbnails = []
thumbnail_urls = [properties.get('imageDesktop')]

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
@ -14,7 +11,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -22,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers',
'ext': 'mp4',
'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
'timestamp': 1523318400,
'upload_date': '20180410',
'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
'timestamp': 1523304000,
'upload_date': '20180409',
'release_date': '20180409',
'series': 'America\'s Test Kitchen',
'season': 'Season 18',
'episode': 'Japanese Suppers',
'season_number': 18,
'episode_number': 15,
'duration': 1376,
'thumbnail': r're:^https?://',
'average_rating': 0,
'view_count': int,
},
'params': {
'skip_download': True,
@ -43,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner',
'ext': 'mp4',
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://',
'timestamp': 1610755200,
'upload_date': '20210116',
'release_date': '20210116',
'series': "America's Test Kitchen",
'season_number': 21,
'timestamp': 1610737200,
'upload_date': '20210115',
'release_date': '20210115',
'series': 'America\'s Test Kitchen',
'season': 'Season 21',
'episode': 'Simple Chicken Dinner',
'season_number': 21,
'episode_number': 3,
'duration': 1397,
'thumbnail': r're:^https?://',
'view_count': int,
'average_rating': 0,
},
'params': {
'skip_download': True,
@ -60,10 +67,10 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True,
}]
@ -93,7 +100,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -104,7 +111,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'playlist_count': 13,
}, {
# Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
@ -113,17 +120,17 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}]
def _real_extract(self, url):
show_name, season_number = self._match_valid_url(url).groups()
show_path, season_number = self._match_valid_url(url).group('show', 'id')
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
slug = 'cco' if show_path == '/cookscountry' else 'atk'
season = 'Season %d' % season_number
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
'Origin': 'https://www.%s.com' % show_name,
'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
@ -139,12 +146,12 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
def entries():
for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url')
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
if not search_url:
continue
yield {
'_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url),
'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
@ -13,7 +10,7 @@ from ..utils import (
)
class AMPIE(InfoExtractor):
class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
feed = self._download_json(
@ -87,8 +84,6 @@ class AMPIE(InfoExtractor):
'ext': ext,
})
self._sort_formats(formats)
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return {

View File

@ -0,0 +1,56 @@
import re
from .common import InfoExtractor
from ..utils import url_or_none, merge_dicts
class AngelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?angel\.com/watch/(?P<series>[^/?#]+)/episode/(?P<id>[\w-]+)/season-(?P<season_number>\d+)/episode-(?P<episode_number>\d+)/(?P<title>[^/?#]+)'
_TESTS = [{
'url': 'https://www.angel.com/watch/tuttle-twins/episode/2f3d0382-ea82-4cdc-958e-84fbadadc710/season-1/episode-1/when-laws-give-you-lemons',
'md5': '4734e5cfdd64a568e837246aa3eaa524',
'info_dict': {
'id': '2f3d0382-ea82-4cdc-958e-84fbadadc710',
'ext': 'mp4',
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 1359.0
}
}, {
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
'info_dict': {
'id': '8dfb714d-bca5-4812-8125-24fb9514cd10',
'ext': 'mp4',
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 3276.0
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_ld = self._search_json_ld(webpage, video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
json_ld.pop('url'), video_id, note='Downloading HD m3u8 information')
info_dict = {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles
}
# Angel uses cloudinary in the background and supports image transformations.
# We remove these transformations and return the source file
base_thumbnail_url = url_or_none(self._og_search_thumbnail(webpage)) or json_ld.pop('thumbnails')
if base_thumbnail_url:
info_dict['thumbnail'] = re.sub(r'(/upload)/.+(/angel-app/.+)$', r'\1\2', base_thumbnail_url)
return merge_dicts(info_dict, json_ld)

View File

@ -1,285 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
int_or_none,
str_or_none,
determine_ext,
)
from ..compat import compat_HTTPError
class AnimeLabBaseIE(InfoExtractor):
_LOGIN_REQUIRED = True
_LOGIN_URL = 'https://www.animelab.com/login'
_NETRC_MACHINE = 'animelab'
def _login(self):
def is_logged_in(login_webpage):
return 'Sign In' not in login_webpage
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
# Check if already logged in
if is_logged_in(login_page):
return
(username, password) = self._get_login_info()
if username is None and self._LOGIN_REQUIRED:
self.raise_login_required('Login is required to access any AnimeLab content')
login_form = {
'email': username,
'password': password,
}
try:
response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
else:
raise
# if login was successful
if is_logged_in(response):
return
raise ExtractorError('Unable to login (cannot verify if logged in)')
def _real_initialize(self):
self._login()
class AnimeLabIE(AnimeLabBaseIE):
_VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
# the following tests require authentication, but a free account will suffice
# just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
# or you can set 'username' and 'password' there
# the tests also select a specific format so that the same video is downloaded
# regardless of whether the user is premium or not (needs testing on a premium account)
_TEST = {
'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
'info_dict': {
'id': '383',
'ext': 'mp4',
'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
'series': 'Fullmetal Alchemist: Brotherhood',
'episode': 'Signs of a Counteroffensive',
'episode_number': 42,
'duration': 1469,
'season': 'Season 1',
'season_number': 1,
'season_id': '38',
},
'params': {
'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
},
'skip': 'All AnimeLab content requires authentication',
}
def _real_extract(self, url):
display_id = self._match_id(url)
# unfortunately we can get different URLs for the same formats
# e.g. if we are using a "free" account so no dubs available
# (so _remove_duplicate_formats is not effective)
# so we use a dictionary as a workaround
formats = {}
for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
'https://www.animelab.com/player/%s/dubbed'):
actual_url = language_option_url % display_id
webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
raw_data = video_collection[position]['videoEntry']
video_id = str_or_none(raw_data['id'])
# create a title from many sources (while grabbing other info)
# TODO use more fallback sources to get some of these
series = raw_data.get('showTitle')
video_type = raw_data.get('videoEntryType', {}).get('name')
episode_number = raw_data.get('episodeNumber')
episode_name = raw_data.get('name')
title_parts = (series, video_type, episode_number, episode_name)
if None not in title_parts:
title = '%s - %s %s - %s' % title_parts
else:
title = episode_name
description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
duration = int_or_none(raw_data.get('duration'))
thumbnail_data = raw_data.get('images', [])
thumbnails = []
for thumbnail in thumbnail_data:
for instance in thumbnail['imageInstances']:
image_data = instance.get('imageInfo', {})
thumbnails.append({
'id': str_or_none(image_data.get('id')),
'url': image_data.get('fullPath'),
'width': image_data.get('width'),
'height': image_data.get('height'),
})
season_data = raw_data.get('season', {}) or {}
season = str_or_none(season_data.get('name'))
season_number = int_or_none(season_data.get('seasonNumber'))
season_id = str_or_none(season_data.get('id'))
for video_data in raw_data['videoList']:
current_video_list = {}
current_video_list['language'] = video_data.get('language', {}).get('languageCode')
is_hardsubbed = video_data.get('hardSubbed')
for video_instance in video_data['videoInstances']:
httpurl = video_instance.get('httpUrl')
url = httpurl if httpurl else video_instance.get('rtmpUrl')
if url is None:
# this video format is unavailable to the user (not premium etc.)
continue
current_format = current_video_list.copy()
format_id_parts = []
format_id_parts.append(str_or_none(video_instance.get('id')))
if is_hardsubbed is not None:
if is_hardsubbed:
format_id_parts.append('yeshardsubbed')
else:
format_id_parts.append('nothardsubbed')
format_id_parts.append(current_format['language'])
format_id = '_'.join([x for x in format_id_parts if x is not None])
ext = determine_ext(url)
if ext == 'm3u8':
for format_ in self._extract_m3u8_formats(
url, video_id, m3u8_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
elif ext == 'mpd':
for format_ in self._extract_mpd_formats(
url, video_id, mpd_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
current_format['url'] = url
quality_data = video_instance.get('videoQuality')
if quality_data:
quality = quality_data.get('name') or quality_data.get('description')
else:
quality = None
height = None
if quality:
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
if height is None:
self.report_warning('Could not get height of video')
else:
current_format['height'] = height
current_format['format_id'] = format_id
formats[current_format['format_id']] = current_format
formats = list(formats.values())
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'series': series,
'episode': episode_name,
'episode_number': int_or_none(episode_number),
'thumbnails': thumbnails,
'duration': duration,
'formats': formats,
'season': season,
'season_number': season_number,
'season_id': season_id,
}
class AnimeLabShowsIE(AnimeLabBaseIE):
_VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
_TEST = {
'url': 'https://www.animelab.com/shows/attack-on-titan',
'info_dict': {
'id': '45',
'title': 'Attack on Titan',
'description': 'md5:989d95a2677e9309368d5cf39ba91469',
},
'playlist_count': 59,
'skip': 'All AnimeLab content requires authentication',
}
def _real_extract(self, url):
_BASE_URL = 'http://www.animelab.com'
_SHOWS_API_URL = '/api/videoentries/show/videos/'
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
show_data = self._parse_json(show_data_str, display_id)
show_id = str_or_none(show_data.get('id'))
title = show_data.get('name')
description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
entries = []
for season in show_data['seasons']:
season_id = season['id']
get_data = urlencode_postdata({
'seasonId': season_id,
'limit': 1000,
})
# despite using urlencode_postdata, we are sending a GET request
target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
response = self._download_webpage(
target_url,
None, 'Season id %s' % season_id)
season_data = self._parse_json(response, display_id)
for video_data in season_data['list']:
entries.append(self.url_result(
_BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
str_or_none(video_data.get('id')), video_data.get('name')
))
return {
'_type': 'playlist',
'id': show_id,
'title': title,
'description': description,
'entries': entries,
}
# TODO implement myqueue

View File

@ -1,291 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
join_nonempty,
url_or_none,
urlencode_postdata,
urljoin,
)
class AnimeOnDemandIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand'
# German-speaking countries of Europe
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
_TESTS = [{
# jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161',
'info_dict': {
'id': '161',
'title': 'Grimgar, Ashes and Illusions (OmU)',
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
},
'playlist_mincount': 4,
}, {
# Film wording is used instead of Episode, ger/jap, Dub/OmU
'url': 'https://www.anime-on-demand.de/anime/39',
'only_matching': True,
}, {
# Episodes without titles, jap, OmU
'url': 'https://www.anime-on-demand.de/anime/162',
'only_matching': True,
}, {
# ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/169',
'only_matching': True,
}, {
# Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True,
}, {
# Flash videos
'url': 'https://www.anime-on-demand.de/anime/12',
'only_matching': True,
}]
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
self.raise_geo_restricted(
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
login_form = self._form_hidden_inputs('new_user', login_page)
login_form.update({
'user[login]': username,
'user[password]': password,
})
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
error = self._search_regex(
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
response, 'error', default=None, group='error')
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_initialize(self):
self._login()
def _real_extract(self, url):
anime_id = self._match_id(url)
webpage = self._download_webpage(url, anime_id)
if 'data-playlist=' not in webpage:
self._download_webpage(
self._APPLY_HTML5_URL, anime_id,
'Activating HTML5 beta', 'Unable to apply HTML5 beta')
webpage = self._download_webpage(url, anime_id)
csrf_token = self._html_search_meta(
'csrf-token', webpage, 'csrf token', fatal=True)
anime_title = self._html_search_regex(
r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
webpage, 'anime name')
anime_description = self._html_search_regex(
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
webpage, 'anime description', default=None)
def extract_info(html, video_id, num=None):
title, description = [None] * 2
formats = []
for input_ in re.findall(
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
attributes = extract_attributes(input_)
title = attributes.get('data-dialog-header')
playlist_urls = []
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
playlist_url = attributes.get(playlist_key)
if isinstance(playlist_url, compat_str) and re.match(
r'/?[\da-zA-Z]+', playlist_url):
playlist_urls.append(attributes[playlist_key])
if not playlist_urls:
continue
lang = attributes.get('data-lang')
lang_note = attributes.get('value')
for playlist_url in playlist_urls:
kind = self._search_regex(
r'videomaterialurl/\d+/([^/]+)/',
playlist_url, 'media kind', default=None)
format_id = join_nonempty(lang, kind) if lang or kind else str(num)
format_note = join_nonempty(kind, lang_note, delim=', ')
item_id_list = []
if format_id:
item_id_list.append(format_id)
item_id_list.append('videomaterial')
playlist = self._download_json(
urljoin(url, playlist_url), video_id,
'Downloading %s JSON' % ' '.join(item_id_list),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
}, fatal=False)
if not playlist:
continue
stream_url = url_or_none(playlist.get('streamurl'))
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
stream_url)
if rtmp:
formats.append({
'url': rtmp.group('url'),
'app': rtmp.group('app'),
'play_path': rtmp.group('playpath'),
'page_url': url,
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
'rtmp_real_time': True,
'format_id': 'rtmp',
'ext': 'flv',
})
continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
continue
playlist = playlist[start_video]
title = playlist.get('title')
if not title:
continue
description = playlist.get('description')
for source in playlist.get('sources', []):
file_ = source.get('file')
if not file_:
continue
ext = determine_ext(file_)
format_id = join_nonempty(
lang, kind,
'hls' if ext == 'm3u8' else None,
'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
if ext == 'm3u8':
file_formats = self._extract_m3u8_formats(
file_, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
elif source.get('type') == 'video/dash' or ext == 'mpd':
continue
file_formats = self._extract_mpd_formats(
file_, video_id, mpd_id=format_id, fatal=False)
else:
continue
for f in file_formats:
f.update({
'language': lang,
'format_note': format_note,
})
formats.extend(file_formats)
return {
'title': title,
'description': description,
'formats': formats,
}
def extract_entries(html, video_id, common_info, num=None):
info = extract_info(html, video_id, num)
if info['formats']:
self._sort_formats(info['formats'])
f = common_info.copy()
f.update(info)
yield f
# Extract teaser/trailer only when full episode is not available
if not info['formats']:
m = re.search(
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
html)
if m:
f = common_info.copy()
f.update({
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'),
'url': urljoin(url, m.group('href')),
})
yield f
def extract_episodes(html):
for num, episode_html in enumerate(re.findall(
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
episodebox_title = self._search_regex(
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
episode_html, 'episodebox title', default=None, group='title')
if not episodebox_title:
continue
episode_number = int(self._search_regex(
r'(?:Episode|Film)\s*(\d+)',
episodebox_title, 'episode number', default=num))
episode_title = self._search_regex(
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
episodebox_title, 'episode title', default=None)
video_id = 'episode-%d' % episode_number
common_info = {
'id': video_id,
'series': anime_title,
'episode': episode_title,
'episode_number': episode_number,
}
for e in extract_entries(episode_html, video_id, common_info):
yield e
def extract_film(html, video_id):
common_info = {
'id': anime_id,
'title': anime_title,
'description': anime_description,
}
for e in extract_entries(html, video_id, common_info):
yield e
def entries():
has_episodes = False
for e in extract_episodes(webpage):
has_episodes = True
yield e
if not has_episodes:
for e in extract_film(webpage, anime_id):
yield e
return self.playlist_result(
entries(), anime_id, anime_title, anime_description)

View File

@ -0,0 +1,128 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
HEADRequest,
ExtractorError,
determine_ext,
scale_thumbnails_to_max_format_width,
)
class Ant1NewsGrBaseIE(InfoExtractor):
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
info = self._download_json(url, video_id, query={'cid': cid or video_id})
try:
source = info['url']
except KeyError:
raise ExtractorError('no source found for %s' % video_id)
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
thumbnails = scale_thumbnails_to_max_format_width(
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
return {
'id': video_id,
'title': info.get('title'),
'thumbnails': thumbnails,
'formats': formats,
'subtitles': subs,
}
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
IE_NAME = 'ant1newsgr:watch'
IE_DESC = 'ant1news.gr videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
_API_PATH = '/templates/data/player'
_TESTS = [{
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
'md5': '95925e6b32106754235f2417e0d2dfab',
'info_dict': {
'id': '1506168',
'ext': 'mp4',
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
},
}]
def _real_extract(self, url):
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
webpage = self._download_webpage(url, video_id)
info = self._download_and_extract_api_data(video_id, netloc)
info['description'] = self._og_search_description(webpage)
return info
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
IE_NAME = 'ant1newsgr:article'
IE_DESC = 'ant1news.gr articles'
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
_TESTS = [{
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
'md5': '294f18331bb516539d72d85a82887dcc',
'info_dict': {
'id': '_xvg/m_cmbatw=',
'ext': 'mp4',
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
'timestamp': 1603092840,
'upload_date': '20201019',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
},
}, {
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
'info_dict': {
'id': '620286',
'title': 'md5:91fe569e952e4d146485740ae927662b',
},
'playlist_mincount': 2,
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
return self.playlist_from_matches(
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
IE_NAME = 'ant1newsgr:embed'
IE_DESC = 'ant1news.gr embedded videos'
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer'
_TESTS = [{
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
'md5': 'dfc58c3a11a5a9aad2ba316ed447def3',
'info_dict': {
'id': '3f_li_c_az_jw_y_u=',
'ext': 'mp4',
'title': 'md5:a30c93332455f53e1e84ae0724f0adf7',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
canonical_url = self._request_webpage(
HEADRequest(url), video_id,
note='Resolve canonical player URL',
errnote='Could not resolve canonical player URL').geturl()
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
cid = urllib.parse.parse_qs(query)['cid'][0]
return self._download_and_extract_api_data(video_id, netloc, cid=cid)

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import hashlib
import json
@ -10,38 +7,68 @@ import time
from .common import InfoExtractor
from ..aes import aes_encrypt
from ..compat import compat_str
from ..utils import (
bytes_to_intlist,
determine_ext,
intlist_to_bytes,
int_or_none,
intlist_to_bytes,
join_nonempty,
smuggle_url,
strip_jsonp,
traverse_obj,
unescapeHTML,
unsmuggle_url,
)
# This import causes a ModuleNotFoundError on some systems for unknown reason.
# See issues:
# https://github.com/yt-dlp/yt-dlp/issues/35
# https://github.com/ytdl-org/youtube-dl/issues/27449
# https://github.com/animelover1984/youtube-dl/issues/17
try:
from .anvato_token_generator import NFLTokenGenerator
except ImportError:
NFLTokenGenerator = None
def md5_text(s):
if not isinstance(s, compat_str):
s = compat_str(s)
return hashlib.md5(s.encode('utf-8')).hexdigest()
return hashlib.md5(str(s).encode()).hexdigest()
class AnvatoIE(InfoExtractor):
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
_API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e',
'info_dict': {
'id': '8032455',
'ext': 'mp4',
'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream',
'description': 'md5:0a12bab8159445e78f52a297a35c6609',
'upload_date': '20220928',
'timestamp': 1664408881,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'LIN',
'tags': ['video', 'news', '5live'],
'duration': 155,
'categories': ['News'],
},
}]
# Copied from anvplayer.min.js
_ANVACK_TABLE = {
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@ -214,86 +241,74 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id),
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
}
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
_TESTS = [{
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
'info_dict': {
'id': '4465496',
'ext': 'mp4',
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
'duration': 22,
'timestamp': 1534855680,
'upload_date': '20180821',
'uploader': 'ANV',
},
'params': {
'skip_download': True,
},
}, {
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
'only_matching': True,
}]
def __init__(self, *args, **kwargs):
super(AnvatoIE, self).__init__(*args, **kwargs)
self.__server_time = None
def _server_time(self, access_key, video_id):
if self.__server_time is not None:
return self.__server_time
return int_or_none(traverse_obj(self._download_json(
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
note='Fetching server time', fatal=False), 'server_time')) or int(time.time())
self.__server_time = int(self._download_json(
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
note='Fetching server time')['server_time'])
return self.__server_time
def _api_prefix(self, access_key):
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
def _get_video_json(self, access_key, video_id):
def _get_video_json(self, access_key, video_id, extracted_token):
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}'
server_time = self._server_time(access_key, video_id)
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
query = {
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
'rtyp': 'fp',
}
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
api = {
'anvrid': anvrid,
'anvts': server_time,
}
if self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
if extracted_token is not None:
api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else:
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
access_key, anvrid, server_time,
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
api['anvstk2'] = 'default'
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps({'api': api}).encode('utf-8'))
video_data_url, video_id, transform_source=strip_jsonp, query=query,
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
def _get_anvato_videos(self, access_key, video_id):
video_data = self._get_video_json(access_key, video_id)
def _get_anvato_videos(self, access_key, video_id, token):
video_data = self._get_video_json(access_key, video_id, token)
formats = []
for published_url in video_data['published_urls']:
video_url = published_url['embed_url']
video_url = published_url.get('embed_url')
if not video_url:
continue
media_format = published_url.get('format')
ext = determine_ext(video_url)
@ -308,15 +323,27 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr or None,
}
if media_format == 'm3u8' and tbr is not None:
vtt_subs, hls_subs = {}, {}
if media_format == 'vtt':
_, vtt_subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, m3u8_id='vtt', fatal=False)
continue
elif media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': join_nonempty('hls', tbr),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
# For some videos the initial m3u8 URL returns JSON instead
manifest_json = self._download_json(
video_url, video_id, note='Downloading manifest JSON', errnote=False)
if manifest_json:
video_url = manifest_json.get('master_m3u8')
if not video_url:
continue
hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)
formats.extend(hls_fmts)
continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
@ -327,8 +354,6 @@ class AnvatoIE(InfoExtractor):
})
formats.append(a_format)
self._sort_formats(formats)
subtitles = {}
for caption in video_data.get('captions', []):
a_caption = {
@ -336,6 +361,7 @@ class AnvatoIE(InfoExtractor):
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
}
subtitles.setdefault(caption['language'], []).append(a_caption)
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
return {
'id': video_id,
@ -352,30 +378,19 @@ class AnvatoIE(InfoExtractor):
'subtitles': subtitles,
}
@staticmethod
def _extract_urls(ie, webpage, video_id):
entries = []
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
anvplayer_data = ie._parse_json(
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
fatal=False)
if not anvplayer_data:
continue
video = anvplayer_data.get('video')
if not isinstance(video, compat_str) or not video.isdigit():
continue
access_key = anvplayer_data.get('accessKey')
if not access_key:
mcp = anvplayer_data.get('mcp')
if mcp:
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
mcp.lower())
@classmethod
def _extract_from_webpage(cls, url, webpage):
for mobj in re.finditer(cls._ANVP_RE, webpage):
anvplayer_data = unescapeHTML(json.loads(mobj.group('anvp'))) or {}
video_id, access_key = anvplayer_data.get('video'), anvplayer_data.get('accessKey')
if not access_key:
access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
if not (video_id or '').isdigit() or not access_key:
continue
entries.append(ie.url_result(
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
video_id=video))
return entries
url = f'anvato:{access_key}:{video_id}'
if anvplayer_data.get('token'):
url = smuggle_url(url, {'token': anvplayer_data['token']})
yield cls.url_result(url, AnvatoIE, video_id)
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(
@ -383,7 +398,7 @@ class AnvatoIE(InfoExtractor):
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
video_id)
return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video'])
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@ -391,9 +406,7 @@ class AnvatoIE(InfoExtractor):
'countries': smuggled_data.get('geo_countries'),
})
mobj = self._match_valid_url(url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
access_key) or access_key
return self._get_anvato_videos(access_key, video_id)
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key
return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token'))

View File

@ -1,7 +0,0 @@
from __future__ import unicode_literals
from .nfl import NFLTokenGenerator
__all__ = [
'NFLTokenGenerator',
]

View File

@ -1,6 +0,0 @@
from __future__ import unicode_literals
class TokenGenerator:
def generate(self, anvack, mcp_id):
raise NotImplementedError('This method must be implemented by subclasses')

Some files were not shown because too many files have changed in this diff Show More