Plugin cleanup and tweaks

This commit is contained in:
2023-02-20 19:18:45 -06:00
parent 372e4ff3dc
commit 3ad9e1c7bb
1138 changed files with 48878 additions and 40445 deletions

View File

@@ -1,27 +1,25 @@
# flake8: noqa: F401
from ..utils import load_plugins
from .common import PostProcessor
from .embedthumbnail import EmbedThumbnailPP
from .exec import ExecPP, ExecAfterDownloadPP
from .exec import ExecAfterDownloadPP, ExecPP
from .ffmpeg import (
FFmpegPostProcessor,
FFmpegCopyStreamPP,
FFmpegConcatPP,
FFmpegCopyStreamPP,
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
FFmpegFixupDuplicateMoovPP,
FFmpegFixupDurationPP,
FFmpegFixupStretchedPP,
FFmpegFixupTimestampPP,
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegFixupStretchedPP,
FFmpegFixupTimestampPP,
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegPostProcessor,
FFmpegSplitChaptersPP,
FFmpegSubtitlesConvertorPP,
FFmpegThumbnailsConvertorPP,
FFmpegSplitChaptersPP,
FFmpegVideoConvertorPP,
FFmpegVideoRemuxerPP,
)
@@ -35,13 +33,15 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP
from ..plugins import load_plugins
_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals())
_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP')
def get_postprocessor(key):
return globals()[key + 'PP']
globals().update(_PLUGIN_CLASSES)
__all__ = [name for name in globals().keys() if name.endswith('PP')]
__all__.extend(('PostProcessor', 'FFmpegPostProcessor'))

View File

@@ -1,14 +1,16 @@
from __future__ import unicode_literals
import functools
import json
import os
import urllib.error
from ..compat import compat_str
from ..utils import (
_configuration_args,
encodeFilename,
PostProcessingError,
write_string,
RetryManager,
_configuration_args,
deprecation_warning,
encodeFilename,
network_exceptions,
sanitized_Request,
)
@@ -42,9 +44,6 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
an initial argument and then with the returned value of the previous
PostProcessor.
The chain will be stopped if one of them ever returns None or the end
of the chain is reached.
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.
@@ -63,24 +62,29 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
@classmethod
def pp_key(cls):
name = cls.__name__[:-2]
return compat_str(name[6:]) if name[:6].lower() == 'ffmpeg' else name
return name[6:] if name[:6].lower() == 'ffmpeg' else name
def to_screen(self, text, prefix=True, *args, **kwargs):
tag = '[%s] ' % self.PP_NAME if prefix else ''
if self._downloader:
return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs)
tag = '[%s] ' % self.PP_NAME if prefix else ''
return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs)
def report_warning(self, text, *args, **kwargs):
if self._downloader:
return self._downloader.report_warning(text, *args, **kwargs)
def deprecation_warning(self, text):
def deprecation_warning(self, msg):
warn = getattr(self._downloader, 'deprecation_warning', deprecation_warning)
return warn(msg, stacklevel=1)
def deprecated_feature(self, msg):
if self._downloader:
return self._downloader.deprecation_warning(text)
write_string(f'DeprecationWarning: {text}')
return self._downloader.deprecated_feature(msg)
return deprecation_warning(msg, stacklevel=1)
def report_error(self, text, *args, **kwargs):
# Exists only for compatibility. Do not use
self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. '
'raise "yt_dlp.utils.PostProcessingError" instead')
if self._downloader:
return self._downloader.report_error(text, *args, **kwargs)
@@ -88,6 +92,12 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
if self._downloader:
return self._downloader.write_debug(text, *args, **kwargs)
def _delete_downloaded_files(self, *files_to_delete, **kwargs):
if self._downloader:
return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)
for filename in set(filter(None, files_to_delete)):
os.remove(filename)
def get_param(self, name, default=None, *args, **kwargs):
if self._downloader:
return self._downloader.params.get(name, default, *args, **kwargs)
@@ -103,12 +113,14 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
return getattr(self._downloader, '_copy_infodict', dict)(info_dict)
@staticmethod
def _restrict_to(*, video=True, audio=True, images=True):
def _restrict_to(*, video=True, audio=True, images=True, simulated=True):
allowed = {'video': video, 'audio': audio, 'images': images}
def decorator(func):
@functools.wraps(func)
def wrapper(self, info):
if not simulated and (self.get_param('simulate') or self.get_param('skip_download')):
return [], info
format_type = (
'video' if info.get('vcodec') != 'none'
else 'audio' if info.get('acodec') != 'none'
@@ -164,6 +176,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
def report_progress(self, s):
s['_default_template'] = '%(postprocessor)s %(status)s' % s
if not self._downloader:
return
progress_dict = s.copy()
progress_dict.pop('info_dict')
@@ -172,12 +186,31 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
progress_template = self.get_param('progress_template', {})
tmpl = progress_template.get('postprocess')
if tmpl:
self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict))
self._downloader.to_screen(
self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
progress_dict))
def _retry_download(self, err, count, retries):
# While this is not an extractor, it behaves similar to one and
# so obey extractor_retries and "--retry-sleep extractor"
RetryManager.report_retry(err, count, retries, info=self.to_screen, warn=self.report_warning,
sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
class AudioConversionError(PostProcessingError):
def _download_json(self, url, *, expected_http_errors=(404,)):
self.write_debug(f'{self.PP_NAME} query: {url}')
for retry in RetryManager(self.get_param('extractor_retries', 3), self._retry_download):
try:
rsp = self._downloader.urlopen(sanitized_Request(url))
except network_exceptions as e:
if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
return None
retry.error = PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
continue
return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
class AudioConversionError(PostProcessingError): # Deprecated
pass

View File

@@ -1,37 +1,29 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import imghdr
import os
import subprocess
import re
try:
from mutagen.flac import Picture, FLAC
from mutagen.mp4 import MP4, MP4Cover
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
has_mutagen = True
except ImportError:
has_mutagen = False
import subprocess
from .common import PostProcessor
from .ffmpeg import (
FFmpegPostProcessor,
FFmpegThumbnailsConvertorPP,
)
from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP
from ..compat import imghdr
from ..dependencies import mutagen
from ..utils import (
Popen,
PostProcessingError,
check_executable,
encodeArgument,
encodeFilename,
error_to_compat_str,
Popen,
PostProcessingError,
prepend_extension,
shell_quote,
)
if mutagen:
from mutagen.flac import FLAC, Picture
from mutagen.mp4 import MP4, MP4Cover
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
class EmbedThumbnailPPError(PostProcessingError):
pass
@@ -61,7 +53,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
return int(mobj.group('w')), int(mobj.group('h'))
def _report_run(self, exe, filename):
self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename))
self.to_screen(f'{exe}: Adding thumbnail to "{filename}"')
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -87,12 +79,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
# Convert unsupported thumbnail formats to PNG (see #25687, #25717)
# Original behavior was to convert to JPG, but since JPG is a lossy
# format, there will be some additional data loss.
# PNG, on the other hand, is lossless.
# Convert unsupported thumbnail formats (see #25687, #25717)
# PNG is preferred since JPEG is lossy
thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
if thumbnail_ext not in ('jpg', 'jpeg', 'png'):
if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
thumbnail_ext = 'png'
@@ -101,8 +91,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
success = True
if info['ext'] == 'mp3':
options = [
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"']
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
@@ -110,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
elif info['ext'] in ['mkv', 'mka']:
options = list(self.stream_copy_opts())
mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')
mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
old_stream, new_stream = self.get_stream_number(
filename, ('tags', 'mimetype'), mimetype)
if old_stream is not None:
@@ -127,7 +117,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
elif info['ext'] in ['m4a', 'mp4', 'mov']:
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
# Method 1: Use mutagen
if not has_mutagen or prefer_atomicparsley:
if not mutagen or prefer_atomicparsley:
success = False
else:
try:
@@ -149,7 +139,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
if not success:
success = True
atomicparsley = next((
x for x in ['AtomicParsley', 'atomicparsley']
# libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1
x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so']
if check_executable(x, ['-v'])), None)
if atomicparsley is None:
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
@@ -167,14 +158,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._report_run('atomicparsley', filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate_or_kill()
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}')
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
if b'No changes' in stdout:
if 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
@@ -200,7 +189,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}')
elif info['ext'] in ['ogg', 'opus', 'flac']:
if not has_mutagen:
if not mutagen:
raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`')
self._report_run('mutagen', filename)
@@ -230,11 +219,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
os.replace(temp_filename, filename)
self.try_utime(filename, mtime, mtime)
files_to_delete = [thumbnail_filename]
if self._already_have_thumbnail:
if original_thumbnail == thumbnail_filename:
files_to_delete = []
elif original_thumbnail != thumbnail_filename:
files_to_delete.append(original_thumbnail)
return files_to_delete, info
converted = original_thumbnail != thumbnail_filename
self._delete_downloaded_files(
thumbnail_filename if converted or not self._already_have_thumbnail else None,
original_thumbnail if converted and not self._already_have_thumbnail else None,
info=info)
return [], info

View File

@@ -1,14 +1,8 @@
from __future__ import unicode_literals
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_quote
from ..utils import (
encodeArgument,
PostProcessingError,
variadic,
)
from ..utils import PostProcessingError, encodeArgument, variadic
class ExecPP(PostProcessor):

View File

@@ -1,30 +1,30 @@
from __future__ import unicode_literals
import collections
import io
import contextvars
import itertools
import json
import os
import re
import subprocess
import time
import re
import json
from .common import AudioConversionError, PostProcessor
from ..compat import compat_str
from .common import PostProcessor
from ..compat import functools, imghdr
from ..utils import (
MEDIA_EXTENSIONS,
ISO639Utils,
Popen,
PostProcessingError,
_get_exe_version_output,
deprecation_warning,
detect_exe_version,
determine_ext,
dfxp2srt,
encodeArgument,
encodeFilename,
filter_dict,
float_or_none,
_get_exe_version_output,
detect_exe_version,
is_outdated_version,
ISO639Utils,
orderedSet,
Popen,
PostProcessingError,
prepend_extension,
replace_extension,
shell_quote,
@@ -33,7 +33,6 @@ from ..utils import (
write_json_file,
)
EXT_TO_OUT_FORMATS = {
'aac': 'adts',
'flac': 'flac',
@@ -45,39 +44,52 @@ EXT_TO_OUT_FORMATS = {
'ts': 'mpegts',
'wma': 'asf',
'wmv': 'asf',
'weba': 'webm',
'vtt': 'webvtt',
}
ACODECS = {
'mp3': 'libmp3lame',
'aac': 'aac',
'flac': 'flac',
'm4a': 'aac',
'opus': 'libopus',
'vorbis': 'libvorbis',
'wav': None,
'alac': None,
# name: (ext, encoder, opts)
'mp3': ('mp3', 'libmp3lame', ()),
'aac': ('m4a', 'aac', ('-f', 'adts')),
'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')),
'opus': ('opus', 'libopus', ()),
'vorbis': ('ogg', 'libvorbis', ()),
'flac': ('flac', 'flac', ()),
'alac': ('m4a', None, ('-acodec', 'alac')),
'wav': ('wav', None, ('-f', 'wav')),
}
def create_mapping_re(supported):
return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported)))
def resolve_mapping(source, mapping):
"""
Get corresponding item from a mapping string like 'A>B/C>D/E'
@returns (target, error_message)
"""
for pair in mapping.lower().split('/'):
kv = pair.split('>', 1)
if len(kv) == 1 or kv[0].strip() == source:
target = kv[-1].strip()
if target == source:
return target, f'already is in target format {source}'
return target, None
return None, f'could not find a mapping for {source}'
class FFmpegPostProcessorError(PostProcessingError):
pass
class FFmpegPostProcessor(PostProcessor):
_ffmpeg_location = contextvars.ContextVar('ffmpeg_location', default=None)
def __init__(self, downloader=None):
PostProcessor.__init__(self, downloader)
self._determine_executables()
def check_version(self):
if not self.available:
raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
required_version = '10-0' if self.basename == 'avconv' else '1.0'
if is_outdated_version(
self._versions[self.basename], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self.basename, self.basename, required_version)
self.report_warning(warning)
self._prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
self._paths = self._determine_executables()
@staticmethod
def get_versions_and_features(downloader=None):
@@ -86,95 +98,107 @@ class FFmpegPostProcessor(PostProcessor):
@staticmethod
def get_versions(downloader=None):
return FFmpegPostProcessor.get_version_and_features(downloader)[0]
return FFmpegPostProcessor.get_versions_and_features(downloader)[0]
_ffmpeg_to_avconv = {'ffmpeg': 'avconv', 'ffprobe': 'avprobe'}
def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
programs = [*self._ffmpeg_to_avconv.keys(), *self._ffmpeg_to_avconv.values()]
def get_ffmpeg_version(path, prog):
out = _get_exe_version_output(path, ['-bsfs'])
ver = detect_exe_version(out) if out else False
if ver:
regexs = [
r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
r'n([0-9.]+)$', # Arch Linux
# 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
]
for regex in regexs:
mobj = re.match(regex, ver)
if mobj:
ver = mobj.group(1)
self._versions[prog] = ver
if prog != 'ffmpeg' or not out:
return
mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
self._features = {
'fdk': '--enable-libfdk-aac' in out,
'setts': 'setts' in out.splitlines(),
'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
}
self.basename = None
self.probe_basename = None
self._paths = None
self._versions = None
self._features = {}
prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
location = self.get_param('ffmpeg_location')
location = self.get_param('ffmpeg_location', self._ffmpeg_location.get())
if location is None:
self._paths = {p: p for p in programs}
return {p: p for p in programs}
if not os.path.exists(location):
self.report_warning(
f'ffmpeg-location {location} does not exist! Continuing without ffmpeg', only_once=True)
return {}
elif os.path.isdir(location):
dirname, basename, filename = location, None, None
else:
if not os.path.exists(location):
self.report_warning(
'ffmpeg-location %s does not exist! '
'Continuing without ffmpeg.' % (location))
self._versions = {}
return
elif os.path.isdir(location):
dirname, basename = location, None
else:
basename = os.path.splitext(os.path.basename(location))[0]
basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
dirname = os.path.dirname(os.path.abspath(location))
if basename in ('ffmpeg', 'ffprobe'):
prefer_ffmpeg = True
filename = os.path.basename(location)
basename = next((p for p in programs if p in filename), 'ffmpeg')
dirname = os.path.dirname(os.path.abspath(location))
if basename in self._ffmpeg_to_avconv.keys():
self._prefer_ffmpeg = True
self._paths = dict(
(p, os.path.join(dirname, p)) for p in programs)
if basename:
self._paths[basename] = location
paths = {p: os.path.join(dirname, p) for p in programs}
if basename and basename in filename:
for p in programs:
path = os.path.join(dirname, filename.replace(basename, p))
if os.path.exists(path):
paths[p] = path
if basename:
paths[basename] = location
return paths
self._versions = {}
for p in programs:
get_ffmpeg_version(self._paths[p], p)
_version_cache, _features_cache = {None: None}, {}
if prefer_ffmpeg is False:
prefs = ('avconv', 'ffmpeg')
def _get_ffmpeg_version(self, prog):
path = self._paths.get(prog)
if path in self._version_cache:
return self._version_cache[path], self._features_cache.get(path, {})
out = _get_exe_version_output(path, ['-bsfs'])
ver = detect_exe_version(out) if out else False
if ver:
regexs = [
r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
r'n([0-9.]+)$', # Arch Linux
# 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
]
for regex in regexs:
mobj = re.match(regex, ver)
if mobj:
ver = mobj.group(1)
self._version_cache[path] = ver
if prog != 'ffmpeg' or not out:
return ver, {}
mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
self._features_cache[path] = features = {
'fdk': '--enable-libfdk-aac' in out,
'setts': 'setts' in out.splitlines(),
'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
}
return ver, features
@property
def _versions(self):
return filter_dict({self.basename: self._version, self.probe_basename: self._probe_version})
@functools.cached_property
def basename(self):
self._version # run property
return self.basename
@functools.cached_property
def probe_basename(self):
self._probe_version # run property
return self.probe_basename
def _get_version(self, kind):
executables = (kind, )
if not self._prefer_ffmpeg:
executables = (kind, self._ffmpeg_to_avconv[kind])
basename, version, features = next(filter(
lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {}))
if kind == 'ffmpeg':
self.basename, self._features = basename, features
else:
prefs = ('ffmpeg', 'avconv')
for p in prefs:
if self._versions[p]:
self.basename = p
break
self.probe_basename = basename
if basename == self._ffmpeg_to_avconv[kind]:
self.deprecated_feature(f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and '
f'may be removed in a future version. Use {kind} instead')
return version
if prefer_ffmpeg is False:
prefs = ('avprobe', 'ffprobe')
else:
prefs = ('ffprobe', 'avprobe')
for p in prefs:
if self._versions[p]:
self.probe_basename = p
break
@functools.cached_property
def _version(self):
return self._get_version('ffmpeg')
if self.basename == 'avconv':
self.deprecation_warning(
'Support for avconv is deprecated and may be removed in a future version. Use ffmpeg instead')
if self.probe_basename == 'avprobe':
self.deprecation_warning(
'Support for avprobe is deprecated and may be removed in a future version. Use ffprobe instead')
@functools.cached_property
def _probe_version(self):
return self._get_version('ffprobe')
@property
def available(self):
@@ -182,7 +206,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def executable(self):
return self._paths[self.basename]
return self._paths.get(self.basename)
@property
def probe_available(self):
@@ -190,7 +214,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def probe_executable(self):
return self._paths[self.probe_basename]
return self._paths.get(self.probe_basename)
@staticmethod
def stream_copy_opts(copy=True, *, ext=None):
@@ -200,10 +224,18 @@ class FFmpegPostProcessor(PostProcessor):
yield from ('-dn', '-ignore_unknown')
if copy:
yield from ('-c', 'copy')
# For some reason, '-c copy -map 0' is not enough to copy subtitles
if ext in ('mp4', 'mov'):
if ext in ('mp4', 'mov', 'm4a'):
yield from ('-c:s', 'mov_text')
def check_version(self):
if not self.available:
raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
required_version = '10-0' if self.basename == 'avconv' else '1.0'
if is_outdated_version(self._version, required_version):
self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
f'to version {required_version} or newer if you encounter any errors')
def get_audio_codec(self, path):
if not self.probe_available and not self.available:
raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
@@ -217,15 +249,14 @@ class FFmpegPostProcessor(PostProcessor):
encodeFilename(self.executable, True),
encodeArgument('-i')]
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_data, stderr_data = handle.communicate_or_kill()
expected_ret = 0 if self.probe_available else 1
if handle.wait() != expected_ret:
self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
stdout, stderr, returncode = Popen.run(
cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode != (0 if self.probe_available else 1):
return None
except (IOError, OSError):
except OSError:
return None
output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
output = stdout if self.probe_available else stderr
if self.probe_available:
audio_codec = None
for line in output.split('\n'):
@@ -259,11 +290,10 @@ class FFmpegPostProcessor(PostProcessor):
]
cmd += opts
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
return json.loads(stdout.decode('utf-8', 'replace'))
cmd.append(self._ffmpeg_filename_argument(path))
self.write_debug(f'ffprobe command line: {shell_quote(cmd)}')
stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
return json.loads(stdout)
def get_stream_number(self, path, keys, value):
streams = self.get_metadata_object(path)['streams']
@@ -283,12 +313,12 @@ class FFmpegPostProcessor(PostProcessor):
if fatal:
raise PostProcessingError(f'Unable to determine video duration: {e.msg}')
def _duration_mismatch(self, d1, d2):
def _duration_mismatch(self, d1, d2, tolerance=2):
if not d1 or not d2:
return None
# The duration is often only known to nearest second. So there can be <1sec disparity natually.
# Further excuse an additional <1sec difference.
return abs(d1 - d2) > 2
return abs(d1 - d2) > tolerance
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
return self.real_run_ffmpeg(
@@ -325,16 +355,15 @@ class FFmpegPostProcessor(PostProcessor):
for i, (path, opts) in enumerate(path_opts) if path)
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate_or_kill()
if p.returncode not in variadic(expected_retcodes):
stderr = stderr.decode('utf-8', 'replace').strip()
_, stderr, returncode = Popen.run(
cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
if returncode not in variadic(expected_retcodes):
self.write_debug(stderr)
raise FFmpegPostProcessorError(stderr.split('\n')[-1])
raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
for out_path, _ in output_path_opts:
if out_path:
self.try_utime(out_path, oldest_mtime, oldest_mtime)
return stderr.decode('utf-8', 'replace')
return stderr
def run_ffmpeg(self, path, out_path, opts, **kwargs):
return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
@@ -379,17 +408,15 @@ class FFmpegPostProcessor(PostProcessor):
"""
concat_file = f'{out_file}.concat'
self.write_debug(f'Writing concat spec to {concat_file}')
with open(concat_file, 'wt', encoding='utf-8') as f:
with open(concat_file, 'w', encoding='utf-8') as f:
f.writelines(self._concat_spec(in_files, concat_opts))
out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file)))
try:
self.real_run_ffmpeg(
[(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
[(out_file, out_flags)])
finally:
os.remove(concat_file)
self.real_run_ffmpeg(
[(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
[(out_file, out_flags)])
self._delete_downloaded_files(concat_file)
@classmethod
def _concat_spec(cls, in_files, concat_opts=None):
@@ -405,12 +432,13 @@ class FFmpegPostProcessor(PostProcessor):
class FFmpegExtractAudioPP(FFmpegPostProcessor):
COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
COMMON_AUDIO_EXTS = MEDIA_EXTENSIONS.common_audio + ('wma', )
SUPPORTED_EXTS = tuple(ACODECS.keys())
FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS))
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
self._preferredcodec = preferredcodec or 'best'
self.mapping = preferredcodec or 'best'
self._preferredquality = float_or_none(preferredquality)
self._nopostoverwrites = nopostoverwrites
@@ -445,69 +473,48 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
raise AudioConversionError(err.msg)
raise PostProcessingError(f'audio conversion failed: {err.msg}')
@PostProcessor._restrict_to(images=False)
def run(self, information):
orig_path = path = information['filepath']
orig_ext = information['ext']
if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
self.to_screen('Skipping audio extraction since the file is already in a common audio format')
target_format, _skip_msg = resolve_mapping(information['ext'], self.mapping)
if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS:
target_format, _skip_msg = None, 'the file is already in a common audio format'
if not target_format:
self.to_screen(f'Not converting audio {orig_path}; {_skip_msg}')
return [], information
filecodec = self.get_audio_codec(path)
if filecodec is None:
raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = 'm4a'
more_opts = ['-bsf:a', 'aac_adtstoasc']
elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible
acodec = 'copy'
extension = filecodec
if filecodec == 'aac':
more_opts = ['-f', 'adts']
if filecodec == 'vorbis':
extension = 'ogg'
elif filecodec == 'alac':
acodec = None
extension = 'm4a'
more_opts += ['-acodec', 'alac']
else:
# MP3 otherwise.
acodec = 'libmp3lame'
extension = 'mp3'
more_opts = self._quality_args(acodec)
if filecodec == 'aac' and target_format in ('m4a', 'best'):
# Lossless, but in another container
extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy'
elif target_format == 'best' or target_format == filecodec:
# Lossless if possible
try:
extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy'
except KeyError:
extension, acodec, more_opts = ACODECS['mp3']
else:
# We convert the audio (lossy if codec is lossy)
acodec = ACODECS[self._preferredcodec]
extension, acodec, more_opts = ACODECS[target_format]
if acodec == 'aac' and self._features.get('fdk'):
acodec = 'libfdk_aac'
extension = self._preferredcodec
more_opts = self._quality_args(acodec)
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
elif self._preferredcodec == 'm4a':
more_opts += ['-bsf:a', 'aac_adtstoasc']
elif self._preferredcodec == 'vorbis':
extension = 'ogg'
elif self._preferredcodec == 'wav':
extension = 'wav'
more_opts += ['-f', 'wav']
elif self._preferredcodec == 'alac':
extension = 'm4a'
more_opts += ['-acodec', 'alac']
acodec, more_opts = 'libfdk_aac', []
prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
temp_path = new_path = prefix + sep + extension
more_opts = list(more_opts)
if acodec != 'copy':
more_opts = self._quality_args(acodec)
# not os.path.splitext, since the latter does not work on unicode in all setups
temp_path = new_path = f'{path.rpartition(".")[0]}.{extension}'
if new_path == path:
if acodec == 'copy':
self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}')
return [], information
orig_path = prepend_extension(path, 'orig')
temp_path = prepend_extension(path, 'temp')
if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path))
@@ -515,14 +522,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self.to_screen('Post-process file %s exists, skipping' % new_path)
return [], information
try:
self.to_screen(f'Destination: {new_path}')
self.run_ffmpeg(path, temp_path, acodec, more_opts)
except AudioConversionError as e:
raise PostProcessingError(
'audio conversion failed: ' + e.msg)
except Exception:
raise PostProcessingError('error running ' + self.basename)
self.to_screen(f'Destination: {new_path}')
self.run_ffmpeg(path, temp_path, acodec, more_opts)
os.replace(path, orig_path)
os.replace(temp_path, new_path)
@@ -532,41 +533,33 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
self.try_utime(
new_path, time.time(), information['filetime'],
errnote='Cannot update utime of audio file')
new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file')
return [orig_path], information
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
SUPPORTED_EXTS = (
*sorted((*MEDIA_EXTENSIONS.common_video, 'gif')),
*sorted((*MEDIA_EXTENSIONS.common_audio, 'aac', 'vorbis')),
)
FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
_ACTION = 'converting'
def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoConvertorPP, self).__init__(downloader)
self._preferedformats = preferedformat.lower().split('/')
def _target_ext(self, source_ext):
for pair in self._preferedformats:
kv = pair.split('>')
if len(kv) == 1 or kv[0].strip() == source_ext:
return kv[-1].strip()
super().__init__(downloader)
self.mapping = preferedformat
@staticmethod
def _options(target_ext):
yield from FFmpegPostProcessor.stream_copy_opts(False)
if target_ext == 'avi':
return ['-c:v', 'libxvid', '-vtag', 'XVID']
return []
yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
@PostProcessor._restrict_to(images=False)
def run(self, info):
filename, source_ext = info['filepath'], info['ext'].lower()
target_ext = self._target_ext(source_ext)
_skip_msg = (
f'could not find a mapping for {source_ext}' if not target_ext
else f'already is in target format {source_ext}' if source_ext == target_ext
else None)
target_ext, _skip_msg = resolve_mapping(source_ext, self.mapping)
if _skip_msg:
self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}')
return [], info
@@ -589,14 +582,16 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
def __init__(self, downloader=None, already_have_subtitle=False):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
super().__init__(downloader)
self._already_have_subtitle = already_have_subtitle
@PostProcessor._restrict_to(images=False)
def run(self, info):
if info['ext'] not in ('mp4', 'webm', 'mkv'):
self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
if info['ext'] not in self.SUPPORTED_EXTS:
self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
return [], info
subtitles = info.get('requested_subtitles')
if not subtitles:
@@ -605,7 +600,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
filename = info['filepath']
# Disabled temporarily. There needs to be a way to overide this
# Disabled temporarily. There needs to be a way to override this
# in case of duration actually mismatching in extractor
# See: https://github.com/yt-dlp/yt-dlp/issues/1870, https://github.com/yt-dlp/yt-dlp/issues/1385
'''
@@ -711,14 +706,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(
(filename, metadata_filename), temp_filename,
itertools.chain(self._options(info['ext']), *options))
for file in filter(None, files_to_delete):
os.remove(file) # Don't obey --keep-files
self._delete_downloaded_files(*files_to_delete)
os.replace(temp_filename, filename)
return [], info
@staticmethod
def _get_chapter_opts(chapters, metadata_filename):
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
with open(metadata_filename, 'w', encoding='utf-8') as f:
def ffmpeg_escape(text):
return re.sub(r'([\\=;#\n])', r'\\\1', text)
@@ -742,13 +736,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None)
if value not in ('', None):
value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
# See [1-4] for some info on media metadata/metadata supported
# by ffmpeg.
# 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
# 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
# 3. https://kodi.wiki/view/Video_file_tagging
# Info on media metadata/metadata supported by ffmpeg:
# https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
# https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
# https://kodi.wiki/view/Video_file_tagging
add('title', ('track', 'title'))
add('date', 'upload_date')
@@ -772,7 +766,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for key, value in info.items():
mobj = re.fullmatch(meta_regex, key)
if value is not None and mobj:
metadata[mobj.group('i') or 'common'][mobj.group('key')] = value
metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '')
# Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
yield ('-write_id3v1', '1')
for name, value in metadata['common'].items():
yield ('-metadata', f'{name}={value}')
@@ -806,11 +803,16 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
yield ('-map', '-0:%d' % old_stream)
new_stream -= 1
yield ('-attach', infofn,
'-metadata:s:%d' % new_stream, 'mimetype=application/json')
yield (
'-attach', infofn,
f'-metadata:s:{new_stream}', 'mimetype=application/json',
f'-metadata:s:{new_stream}', 'filename=info.json',
)
class FFmpegMergerPP(FFmpegPostProcessor):
SUPPORTED_EXTS = MEDIA_EXTENSIONS.common_video
@PostProcessor._restrict_to(images=False)
def run(self, info):
filename = info['filepath']
@@ -900,7 +902,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
def __init__(self, downloader=None, trim=0.001):
# "trim" should be used when the video contains unintended packets
super(FFmpegFixupTimestampPP, self).__init__(downloader)
super().__init__(downloader)
assert isinstance(trim, (int, float))
self.trim = str(trim)
@@ -935,10 +937,10 @@ class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPP):
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
SUPPORTED_EXTS = MEDIA_EXTENSIONS.subtitles
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
super().__init__(downloader)
self.format = format
def run(self, info):
@@ -980,7 +982,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
with open(srt_file, 'w', encoding='utf-8') as f:
f.write(srt_data)
old_file = srt_file
@@ -997,7 +999,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
with open(new_file, encoding='utf-8') as f:
subs[lang] = {
'ext': new_ext,
'data': f.read(),
@@ -1034,8 +1036,8 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
return (
destination,
['-ss', compat_str(chapter['start_time']),
'-t', compat_str(chapter['end_time'] - chapter['start_time'])])
['-ss', str(chapter['start_time']),
'-t', str(chapter['end_time'] - chapter['start_time'])])
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -1052,29 +1054,28 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
if in_file != info['filepath']:
os.remove(in_file)
self._delete_downloaded_files(in_file, msg=None)
return [], info
class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('jpg', 'png')
SUPPORTED_EXTS = MEDIA_EXTENSIONS.thumbnails
FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
def __init__(self, downloader=None, format=None):
super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
self.format = format
super().__init__(downloader)
self.mapping = format
@staticmethod
def is_webp(path):
with open(encodeFilename(path), 'rb') as f:
b = f.read(12)
return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
@classmethod
def is_webp(cls, path):
deprecation_warning(f'{cls.__module__}.{cls.__name__}.is_webp is deprecated')
return imghdr.what(path) == 'webp'
def fixup_webp(self, info, idx=-1):
thumbnail_filename = info['thumbnails'][idx]['filepath']
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
if thumbnail_ext:
thumbnail_ext = thumbnail_ext[1:].lower()
if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp':
self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
webp_filename = replace_extension(thumbnail_filename, 'webp')
os.replace(thumbnail_filename, webp_filename)
@@ -1084,17 +1085,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
@staticmethod
def _options(target_ext):
yield from ('-update', '1')
if target_ext == 'jpg':
return ['-bsf:v', 'mjpeg2jpeg']
return []
yield from ('-bsf:v', 'mjpeg2jpeg')
def convert_thumbnail(self, thumbnail_filename, target_ext):
thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}')
_, source_ext = os.path.splitext(thumbnail_filename)
self.real_run_ffmpeg(
[(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
[(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
[(thumbnail_filename, [] if source_ext == '.gif' else ['-f', 'image2', '-pattern_type', 'none'])],
[(thumbnail_conv_filename, self._options(target_ext))])
return thumbnail_conv_filename
def run(self, info):
@@ -1107,18 +1109,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
continue
has_thumbnail = True
self.fixup_webp(info, idx)
_, thumbnail_ext = os.path.splitext(original_thumbnail)
if thumbnail_ext:
thumbnail_ext = thumbnail_ext[1:].lower()
original_thumbnail = thumbnail_dict['filepath'] # Path can change during fixup
thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower()
if thumbnail_ext == 'jpeg':
thumbnail_ext = 'jpg'
if thumbnail_ext == self.format:
self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
target_ext, _skip_msg = resolve_mapping(thumbnail_ext, self.mapping)
if _skip_msg:
self.to_screen(f'Not converting thumbnail "{original_thumbnail}"; {_skip_msg}')
continue
thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext)
files_to_delete.append(original_thumbnail)
info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
info['__files_to_move'][original_thumbnail], self.format)
info['__files_to_move'][original_thumbnail], target_ext)
if not has_thumbnail:
self.to_screen('There aren\'t any thumbnails to convert')
@@ -1130,15 +1132,21 @@ class FFmpegConcatPP(FFmpegPostProcessor):
self._only_multi_video = only_multi_video
super().__init__(downloader)
def _get_codecs(self, file):
codecs = traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name'))
self.write_debug(f'Codecs = {", ".join(codecs)}')
return tuple(codecs)
def concat_files(self, in_files, out_file):
if not self._downloader._ensure_dir_exists(out_file):
return
if len(in_files) == 1:
if os.path.realpath(in_files[0]) != os.path.realpath(out_file):
self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"')
os.replace(in_files[0], out_file)
return []
codecs = [traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name')) for file in in_files]
if len(set(map(tuple, codecs))) > 1:
if len(set(map(self._get_codecs, in_files))) > 1:
raise PostProcessingError(
'The files have different streams/codecs and cannot be concatenated. '
'Either select different formats or --recode-video them to a common format')
@@ -1147,22 +1155,21 @@ class FFmpegConcatPP(FFmpegPostProcessor):
super().concat_files(in_files, out_file)
return in_files
@PostProcessor._restrict_to(images=False)
@PostProcessor._restrict_to(images=False, simulated=False)
def run(self, info):
entries = info.get('entries') or []
if (self.get_param('skip_download') or not any(entries)
or self._only_multi_video and info['_type'] != 'multi_video'):
if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'):
return [], info
elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []):
elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)):
raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats')
in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath'))
in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or []
if len(in_files) < len(entries):
raise PostProcessingError('Aborting concatenation because some downloads failed')
ie_copy = self._downloader._playlist_infodict(info)
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
info, self._downloader._playlist_infodict(info))
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
files_to_delete = self.concat_files(in_files, out_file)

View File

@@ -1,37 +1,37 @@
import re
from enum import Enum
from .common import PostProcessor
from ..utils import Namespace, filter_dict
class MetadataParserPP(PostProcessor):
class Actions(Enum):
INTERPRET = 'interpretter'
REPLACE = 'replacer'
def __init__(self, downloader, actions):
PostProcessor.__init__(self, downloader)
super().__init__(downloader)
self._actions = []
for f in actions:
action = f[0]
assert isinstance(action, self.Actions)
self._actions.append(getattr(self, action.value)(*f[1:]))
action, *args = f
assert action in self.Actions
self._actions.append(action(self, *args))
@classmethod
def validate_action(cls, action, *data):
''' Each action can be:
"""Each action can be:
(Actions.INTERPRET, from, to) OR
(Actions.REPLACE, field, search, replace)
'''
if not isinstance(action, cls.Actions):
"""
if action not in cls.Actions:
raise ValueError(f'{action!r} is not a valid action')
getattr(cls, action.value)(cls, *data)
action(cls, *data) # So this can raise error to validate
@staticmethod
def field_to_template(tmpl):
if re.match(r'[a-zA-Z_]+$', tmpl):
return f'%({tmpl})s'
from ..YoutubeDL import YoutubeDL
err = YoutubeDL.validate_outtmpl(tmpl)
if err:
raise err
return tmpl
@staticmethod
@@ -68,9 +68,9 @@ class MetadataParserPP(PostProcessor):
if match is None:
self.to_screen(f'Could not interpret {inp!r} as {out!r}')
return
for attribute, value in match.groupdict().items():
for attribute, value in filter_dict(match.groupdict()).items():
info[attribute] = value
self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}')
template = self.field_to_template(inp)
out_re = re.compile(self.format_to_regex(out))
@@ -95,6 +95,8 @@ class MetadataParserPP(PostProcessor):
search_re = re.compile(search)
return f
Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer)
class MetadataFromFieldPP(MetadataParserPP):
@classmethod

View File

@@ -3,17 +3,9 @@ import heapq
import os
from .common import PostProcessor
from .ffmpeg import (
FFmpegPostProcessor,
FFmpegSubtitlesConvertorPP
)
from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP
from .sponsorblock import SponsorBlockPP
from ..utils import (
orderedSet,
PostProcessingError,
prepend_extension,
)
from ..utils import PostProcessingError, orderedSet, prepend_extension
_TINY_CHAPTER_DURATION = 1
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
@@ -24,7 +16,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
*, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
FFmpegPostProcessor.__init__(self, downloader)
self._remove_chapters_patterns = set(remove_chapters_patterns or [])
self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.POI_CATEGORIES.keys())
self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
self._ranges_to_remove = set(remove_ranges or [])
self._sponsorblock_chapter_title = sponsorblock_chapter_title
self._force_keyframes = force_keyframes
@@ -40,14 +32,18 @@ class ModifyChaptersPP(FFmpegPostProcessor):
real_duration = self._get_real_video_duration(info['filepath'])
if not chapters:
chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}]
chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
if not cuts:
return [], info
elif not info['chapters']:
self.report_warning('You have requested to remove the entire video, which is not possible')
return [], info
if self._duration_mismatch(real_duration, info.get('duration')):
if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']):
original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
if self._duration_mismatch(real_duration, original_duration, 1):
if not self._duration_mismatch(real_duration, info['duration']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info
if not info.get('__real_download'):
@@ -68,9 +64,11 @@ class ModifyChaptersPP(FFmpegPostProcessor):
# Renaming should only happen after all files are processed
files_to_remove = []
for in_file, out_file in in_out_files:
mtime = os.stat(in_file).st_mtime
uncut_file = prepend_extension(in_file, 'uncut')
os.replace(in_file, uncut_file)
os.replace(out_file, in_file)
self.try_utime(in_file, mtime, mtime)
files_to_remove.append(uncut_file)
return files_to_remove, info
@@ -104,7 +102,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
'start_time': start,
'end_time': end,
'category': 'manually_removed',
'_categories': [('manually_removed', start, end)],
'_categories': [('manually_removed', start, end, 'Manually removed')],
'remove': True,
} for start, end in self._ranges_to_remove)
@@ -295,13 +293,12 @@ class ModifyChaptersPP(FFmpegPostProcessor):
c.pop('_was_cut', None)
cats = c.pop('_categories', None)
if cats:
category = min(cats, key=lambda c: c[2] - c[1])[0]
cats = orderedSet(x[0] for x in cats)
category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
c.update({
'category': category,
'categories': cats,
'name': SponsorBlockPP.CATEGORIES[category],
'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats]
'categories': orderedSet(x[0] for x in cats),
'name': category_name,
'category_names': orderedSet(x[3] for x in cats),
})
c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
# Merge identically named sponsors.
@@ -320,7 +317,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
self.to_screen(f'Removing chapters from {filename}')
self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
if in_file != filename:
os.remove(in_file)
self._delete_downloaded_files(in_file, msg=None)
return out_file
@staticmethod

View File

@@ -1,13 +1,12 @@
from __future__ import unicode_literals
import os
import shutil
from .common import PostProcessor
from ..compat import shutil
from ..utils import (
PostProcessingError,
decodeFilename,
encodeFilename,
make_dir,
PostProcessingError,
)
@@ -47,7 +46,7 @@ class MoveFilesAfterDownloadPP(PostProcessor):
% (oldfile, newfile))
continue
make_dir(newfile, PostProcessingError)
self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile))
self.to_screen(f'Moving file "{oldfile}" to "{newfile}"')
shutil.move(oldfile, newfile) # os.rename cannot move between volumes
info['filepath'] = finalpath

View File

@@ -1,19 +1,18 @@
from __future__ import unicode_literals
import os
import shlex
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_split
from ..utils import (
Popen,
PostProcessingError,
check_executable,
cli_option,
encodeArgument,
encodeFilename,
prepend_extension,
shell_quote,
str_or_none,
Popen,
PostProcessingError,
prepend_extension,
)
@@ -79,23 +78,21 @@ class SponSkrubPP(PostProcessor):
if not self.cutout:
cmd += ['-chapter']
cmd += cli_option(self._downloader.params, '-proxy', 'proxy')
cmd += compat_shlex_split(self.args) # For backward compatibility
cmd += shlex.split(self.args) # For backward compatibility
cmd += self._configuration_args(self._exe_name, use_compat=False)
cmd += ['--', information['id'], filename, temp_filename]
cmd = [encodeArgument(i) for i in cmd]
self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
pipe = None if self.get_param('verbose') else subprocess.PIPE
p = Popen(cmd, stdout=pipe)
stdout = p.communicate_or_kill()[0]
stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE)
if p.returncode == 0:
if not returncode:
os.replace(temp_filename, filename)
self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
elif p.returncode == 3:
elif returncode == 3:
self.to_screen('No segments in the SponsorBlock database')
else:
msg = stdout.decode('utf-8', 'replace').strip() if stdout else ''
msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1]
raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode)
raise PostProcessingError(
stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1]
or f'sponskrub failed with error code {returncode}')
return [], information

View File

@@ -1,12 +1,9 @@
from hashlib import sha256
import itertools
import hashlib
import json
import re
import time
import urllib.parse
from .ffmpeg import FFmpegPostProcessor
from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
from ..utils import PostProcessingError, network_exceptions, sanitized_Request
class SponsorBlockPP(FFmpegPostProcessor):
@@ -17,6 +14,10 @@ class SponsorBlockPP(FFmpegPostProcessor):
POI_CATEGORIES = {
'poi_highlight': 'Highlight',
}
NON_SKIPPABLE_CATEGORIES = {
**POI_CATEGORIES,
'chapter': 'Chapter',
}
CATEGORIES = {
'sponsor': 'Sponsor',
'intro': 'Intermission/Intro Animation',
@@ -26,7 +27,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
'filler': 'Filler Tangent',
'interaction': 'Interaction Reminder',
'music_offtopic': 'Non-Music Section',
**POI_CATEGORIES,
**NON_SKIPPABLE_CATEGORIES
}
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
@@ -41,7 +42,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
return [], info
self.to_screen('Fetching SponsorBlock segments')
info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
return [], info
def _get_sponsor_chapters(self, info, duration):
@@ -49,6 +50,9 @@ class SponsorBlockPP(FFmpegPostProcessor):
def duration_filter(s):
start_end = s['segment']
# Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
if start_end == (0, 0):
return False
# Ignore milliseconds difference at the start.
if start_end[0] <= 1:
start_end[0] = 0
@@ -60,7 +64,8 @@ class SponsorBlockPP(FFmpegPostProcessor):
if duration and duration - start_end[1] <= 1:
start_end[1] = duration
# SponsorBlock duration may be absent or it may deviate from the real one.
return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1
diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
duration_match = [s for s in segments if duration_filter(s)]
if len(duration_match) != len(segments):
@@ -68,50 +73,32 @@ class SponsorBlockPP(FFmpegPostProcessor):
def to_chapter(s):
(start, end), cat = s['segment'], s['category']
title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
return {
'start_time': start,
'end_time': end,
'category': cat,
'title': self.CATEGORIES[cat],
'_categories': [(cat, start, end)]
'title': title,
'type': s['actionType'],
'_categories': [(cat, start, end, title)],
}
sponsor_chapters = [to_chapter(s) for s in duration_match]
if not sponsor_chapters:
self.to_screen('No segments were found in the SponsorBlock database')
self.to_screen('No matching segments were found in the SponsorBlock database')
else:
self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
return sponsor_chapters
def _get_sponsor_segments(self, video_id, service):
hash = sha256(video_id.encode('ascii')).hexdigest()
hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
# SponsorBlock API recommends using first 4 hash characters.
url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
'service': service,
'categories': json.dumps(self._categories),
'actionTypes': json.dumps(['skip', 'poi', 'chapter'])
})
self.write_debug(f'SponsorBlock query: {url}')
for d in self._get_json(url):
for d in self._download_json(url) or []:
if d['videoID'] == video_id:
return d['segments']
return []
def _get_json(self, url):
# While this is not an extractor, it behaves similar to one and
# so obey extractor_retries and sleep_interval_requests
max_retries = self.get_param('extractor_retries', 3)
sleep_interval = self.get_param('sleep_interval_requests') or 0
for retries in itertools.count():
try:
rsp = self._downloader.urlopen(sanitized_Request(url))
return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
except network_exceptions as e:
if isinstance(e, compat_HTTPError) and e.code == 404:
return []
if retries < max_retries:
self.report_warning(f'{e}. Retrying...')
if sleep_interval > 0:
self.to_screen(f'Sleeping {sleep_interval} seconds ...')
time.sleep(sleep_interval)
continue
raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}')

View File

@@ -1,78 +1,63 @@
from __future__ import unicode_literals
import os
from .common import PostProcessor
from ..compat import compat_os_name
from ..utils import (
hyphenate_date,
write_xattr,
PostProcessingError,
XAttrMetadataError,
XAttrUnavailableError,
hyphenate_date,
write_xattr,
)
class XAttrMetadataPP(PostProcessor):
#
# More info about extended attributes for media:
# http://freedesktop.org/wiki/CommonExtendedAttributes/
# http://www.freedesktop.org/wiki/PhreedomDraft/
# http://dublincore.org/documents/usageguide/elements.shtml
#
# TODO:
# * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
# * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
#
"""Set extended attributes on downloaded file (if xattr support is found)
More info about extended attributes for media:
http://freedesktop.org/wiki/CommonExtendedAttributes/
http://www.freedesktop.org/wiki/PhreedomDraft/
http://dublincore.org/documents/usageguide/elements.shtml
TODO:
* capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
* figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
"""
XATTR_MAPPING = {
'user.xdg.referrer.url': 'webpage_url',
# 'user.xdg.comment': 'description',
'user.dublincore.title': 'title',
'user.dublincore.date': 'upload_date',
'user.dublincore.description': 'description',
'user.dublincore.contributor': 'uploader',
'user.dublincore.format': 'format',
}
def run(self, info):
""" Set extended attributes on downloaded file (if xattr support is found). """
# Write the metadata to the file's xattrs
mtime = os.stat(info['filepath']).st_mtime
self.to_screen('Writing metadata to file\'s xattrs')
filename = info['filepath']
try:
xattr_mapping = {
'user.xdg.referrer.url': 'webpage_url',
# 'user.xdg.comment': 'description',
'user.dublincore.title': 'title',
'user.dublincore.date': 'upload_date',
'user.dublincore.description': 'description',
'user.dublincore.contributor': 'uploader',
'user.dublincore.format': 'format',
}
num_written = 0
for xattrname, infoname in xattr_mapping.items():
for xattrname, infoname in self.XATTR_MAPPING.items():
value = info.get(infoname)
if value:
if infoname == 'upload_date':
value = hyphenate_date(value)
byte_value = value.encode('utf-8')
write_xattr(filename, xattrname, byte_value)
num_written += 1
return [], info
write_xattr(info['filepath'], xattrname, value.encode())
except XAttrUnavailableError as e:
raise PostProcessingError(str(e))
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
self.report_warning(
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
'Some extended attributes are not written')
elif e.reason == 'VALUE_TOO_LONG':
self.report_warning(
'Unable to write extended attributes due to too long values.')
self.report_warning('Unable to write extended attributes due to too long values.')
else:
msg = 'This filesystem doesn\'t support extended attributes. '
if compat_os_name == 'nt':
msg += 'You need to use NTFS.'
else:
msg += '(You may have to enable them in your /etc/fstab)'
raise PostProcessingError(str(e))
return [], info
tip = ('You need to use NTFS' if compat_os_name == 'nt'
else 'You may have to enable them in your "/etc/fstab"')
raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}')
self.try_utime(info['filepath'], mtime, mtime)
return [], info