Plugin cleanup and tweaks
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
@@ -71,7 +69,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -125,11 +123,12 @@ class MotherlessIE(InfoExtractor):
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
(r'"media-meta-member">\s+<a href="/m/([^"]+)"',
|
||||
r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id', fatal=False)
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
@@ -219,23 +218,23 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
|
||||
webpage, 'page_count', default=0), 'page_count')
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
if not page_count:
|
||||
return
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
|
Reference in New Issue
Block a user