Files
debs
images
plugins
archiver
disk_usage
favorites
file_properties
git_clone
movie_tv_info
py_run
searcher
template
thumbnailer
translate
trasher
vod_thumbnailer
youtube_download
yt_dlp
__pyinstaller
compat
dependencies
downloader
extractor
youtube
__init__.py
_extractors.py
abc.py
abcnews.py
abcotvs.py
abematv.py
academicearth.py
acast.py
acfun.py
adn.py
adobeconnect.py
adobepass.py
adobetv.py
adultswim.py
aenetworks.py
aeonco.py
afreecatv.py
agora.py
airtv.py
aitube.py
aliexpress.py
aljazeera.py
allocine.py
allstar.py
alphaporno.py
alsace20tv.py
altcensored.py
alura.py
amadeustv.py
amara.py
amazon.py
amazonminitv.py
amcnetworks.py
americastestkitchen.py
amp.py
anchorfm.py
angel.py
antenna.py
anvato.py
aol.py
apa.py
aparat.py
appleconnect.py
applepodcasts.py
appletrailers.py
archiveorg.py
arcpublishing.py
ard.py
arkena.py
arnes.py
art19.py
arte.py
asobichannel.py
asobistage.py
atresplayer.py
atscaleconf.py
atvat.py
audimedia.py
audioboom.py
audiodraft.py
audiomack.py
audius.py
awaan.py
aws.py
axs.py
azmedien.py
baidu.py
banbye.py
bandaichannel.py
bandcamp.py
bandlab.py
bannedvideo.py
bbc.py
beacon.py
beatbump.py
beatport.py
beeg.py
behindkink.py
bellmedia.py
berufetv.py
bet.py
bfi.py
bfmtv.py
bibeltv.py
bigflix.py
bigo.py
bild.py
bilibili.py
biobiochiletv.py
bitchute.py
blackboardcollaborate.py
bleacherreport.py
blerp.py
blogger.py
bloomberg.py
bluesky.py
bokecc.py
bongacams.py
boosty.py
bostonglobe.py
box.py
boxcast.py
bpb.py
br.py
brainpop.py
bravotv.py
breitbart.py
brightcove.py
brilliantpala.py
bundesliga.py
bundestag.py
bunnycdn.py
businessinsider.py
buzzfeed.py
byutv.py
c56.py
caffeinetv.py
callin.py
caltrans.py
cam4.py
camdemy.py
camfm.py
cammodels.py
camsoda.py
camtasia.py
canal1.py
canalalpha.py
canalc2.py
canalplus.py
canalsurmas.py
caracoltv.py
cartoonnetwork.py
cbc.py
cbs.py
cbsnews.py
cbssports.py
ccc.py
ccma.py
cctv.py
cda.py
cellebrite.py
ceskatelevize.py
cgtn.py
charlierose.py
chaturbate.py
chilloutzone.py
chzzk.py
cinemax.py
cinetecamilano.py
cineverse.py
ciscolive.py
ciscowebex.py
cjsw.py
clipchamp.py
clippit.py
cliprs.py
closertotruth.py
cloudflarestream.py
cloudycdn.py
clubic.py
clyp.py
cmt.py
cnbc.py
cnn.py
comedycentral.py
common.py
commonmistakes.py
commonprotocols.py
condenast.py
contv.py
corus.py
coub.py
cozytv.py
cpac.py
cracked.py
crackle.py
craftsy.py
crooksandliars.py
crowdbunker.py
crtvg.py
cspan.py
ctsnews.py
ctv.py
ctvnews.py
cultureunplugged.py
curiositystream.py
cwtv.py
cybrary.py
dacast.py
dailymail.py
dailymotion.py
dailywire.py
damtomo.py
dangalplay.py
daum.py
daystar.py
dbtv.py
dctp.py
democracynow.py
detik.py
deuxm.py
dfb.py
dhm.py
digitalconcerthall.py
digiteka.py
digiview.py
discogs.py
disney.py
dispeak.py
dlf.py
dlive.py
douyutv.py
dplay.py
drbonanza.py
dreisat.py
drooble.py
dropbox.py
dropout.py
drtalks.py
drtuber.py
drtv.py
dtube.py
duboku.py
dumpert.py
duoplay.py
dvtv.py
dw.py
eagleplatform.py
ebaumsworld.py
ebay.py
egghead.py
eggs.py
eighttracks.py
eitb.py
elementorembed.py
elonet.py
elpais.py
eltrecetv.py
embedly.py
epicon.py
epidemicsound.py
eplus.py
epoch.py
eporner.py
erocast.py
eroprofile.py
err.py
ertgr.py
espn.py
ettutv.py
europa.py
europeantour.py
eurosport.py
euscreen.py
expressen.py
extractors.py
eyedotv.py
facebook.py
fancode.py
fathom.py
faz.py
fc2.py
fczenit.py
fifa.py
filmon.py
filmweb.py
firsttv.py
fivetv.py
flextv.py
flickr.py
floatplane.py
folketinget.py
footyroom.py
formula1.py
fourtube.py
fox.py
fox9.py
foxnews.py
foxsports.py
fptplay.py
francaisfacile.py
franceinter.py
francetv.py
freesound.py
freespeech.py
freetv.py
frontendmasters.py
fujitv.py
funk.py
funker530.py
fuyintv.py
gab.py
gaia.py
gamedevtv.py
gamejolt.py
gamespot.py
gamestar.py
gaskrank.py
gazeta.py
gbnews.py
gdcvault.py
gedidigital.py
generic.py
genericembeds.py
genius.py
germanupa.py
getcourseru.py
gettr.py
giantbomb.py
glide.py
globalplayer.py
globo.py
glomex.py
gmanetwork.py
go.py
godresource.py
godtube.py
gofile.py
golem.py
goodgame.py
googledrive.py
googlepodcasts.py
googlesearch.py
goplay.py
gopro.py
goshgay.py
gotostage.py
gputechconf.py
graspop.py
gronkh.py
groupon.py
harpodeon.py
hbo.py
hearthisat.py
heise.py
hellporno.py
hgtv.py
hidive.py
historicfilms.py
hitrecord.py
hketv.py
hollywoodreporter.py
holodex.py
hotnewhiphop.py
hotstar.py
hrefli.py
hrfensehen.py
hrti.py
hse.py
huajiao.py
huffpost.py
hungama.py
huya.py
hypem.py
hypergryph.py
hytale.py
icareus.py
ichinanalive.py
idolplus.py
ign.py
iheart.py
ilpost.py
iltalehti.py
imdb.py
imggaming.py
imgur.py
ina.py
inc.py
indavideo.py
infoq.py
instagram.py
internazionale.py
internetvideoarchive.py
iprima.py
iqiyi.py
islamchannel.py
israelnationalnews.py
itprotv.py
itv.py
ivi.py
ivideon.py
ivoox.py
iwara.py
ixigua.py
izlesene.py
jamendo.py
japandiet.py
jeuxvideo.py
jiocinema.py
jiosaavn.py
jixie.py
joj.py
joqrag.py
jove.py
jstream.py
jtbc.py
jwplatform.py
kakao.py
kaltura.py
kankanews.py
karaoketv.py
kelbyone.py
kenh14.py
khanacademy.py
kick.py
kicker.py
kickstarter.py
kika.py
kinja.py
kinopoisk.py
kommunetv.py
kompas.py
koo.py
krasview.py
kth.py
ku6.py
kukululive.py
kuwo.py
la7.py
laracasts.py
lastfm.py
laxarxames.py
lbry.py
lci.py
lcp.py
learningonscreen.py
lecture2go.py
lecturio.py
leeco.py
lefigaro.py
lego.py
lemonde.py
lenta.py
libraryofcongress.py
libsyn.py
lifenews.py
likee.py
limelight.py
linkedin.py
liputan6.py
listennotes.py
litv.py
livejournal.py
livestream.py
livestreamfails.py
lnk.py
loco.py
loom.py
lovehomeporn.py
lrt.py
lsm.py
lumni.py
lynda.py
maariv.py
magellantv.py
magentamusik.py
mailru.py
mainstreaming.py
mangomolo.py
manoto.py
manyvids.py
maoritv.py
markiza.py
massengeschmacktv.py
masters.py
matchtv.py
mbn.py
mdr.py
medaltv.py
mediaite.py
mediaklikk.py
medialaan.py
mediaset.py
mediasite.py
mediastream.py
mediaworksnz.py
medici.py
megaphone.py
megatvcom.py
meipai.py
melonvod.py
metacritic.py
mgtv.py
microsoftembed.py
microsoftstream.py
minds.py
minoto.py
mirrativ.py
mirrorcouk.py
mit.py
mitele.py
mixch.py
mixcloud.py
mlb.py
mlssoccer.py
mocha.py
mojevideo.py
mojvideo.py
monstercat.py
motherless.py
motorsport.py
moviepilot.py
moview.py
moviezine.py
movingimage.py
msn.py
mtv.py
muenchentv.py
murrtube.py
museai.py
musescore.py
musicdex.py
mx3.py
mxplayer.py
myspace.py
myspass.py
myvideoge.py
myvidster.py
mzaalo.py
n1.py
nate.py
nationalgeographic.py
naver.py
nba.py
nbc.py
ndr.py
ndtv.py
nebula.py
nekohacker.py
nerdcubed.py
nest.py
neteasemusic.py
netverse.py
netzkino.py
newgrounds.py
newspicks.py
newsy.py
nextmedia.py
nexx.py
nfb.py
nfhsnetwork.py
nfl.py
nhk.py
nhl.py
nick.py
niconico.py
niconicochannelplus.py
ninaprotocol.py
ninecninemedia.py
ninegag.py
ninenews.py
ninenow.py
nintendo.py
nitter.py
nobelprize.py
noice.py
nonktube.py
noodlemagazine.py
noovo.py
nosnl.py
nova.py
novaplay.py
nowness.py
noz.py
npo.py
npr.py
nrk.py
nrl.py
nts.py
ntvcojp.py
ntvde.py
ntvru.py
nubilesporn.py
nuevo.py
nuum.py
nuvid.py
nytimes.py
nzherald.py
nzonscreen.py
nzz.py
odkmedia.py
odnoklassniki.py
oftv.py
oktoberfesttv.py
olympics.py
on24.py
once.py
ondemandkorea.py
onefootball.py
onenewsnz.py
oneplace.py
onet.py
onionstudios.py
opencast.py
openload.py
openrec.py
ora.py
orf.py
outsidetv.py
owncloud.py
packtpub.py
palcomp3.py
panopto.py
paramountplus.py
parler.py
parlview.py
parti.py
patreon.py
pbs.py
pearvideo.py
peekvids.py
peertube.py
peertv.py
peloton.py
performgroup.py
periscope.py
pgatour.py
philharmoniedeparis.py
phoenix.py
photobucket.py
pialive.py
piapro.py
picarto.py
piksel.py
pinkbike.py
pinterest.py
piramidetv.py
pixivsketch.py
pladform.py
planetmarathi.py
platzi.py
playplustv.py
playsuisse.py
playtvak.py
playwire.py
pluralsight.py
plutotv.py
plvideo.py
podbayfm.py
podchaser.py
podomatic.py
pokergo.py
polsatgo.py
polskieradio.py
popcorntimes.py
popcorntv.py
pornbox.py
pornflip.py
pornhub.py
pornotube.py
pornovoisines.py
pornoxo.py
pr0gramm.py
prankcast.py
premiershiprugby.py
presstv.py
projectveritas.py
prosiebensat1.py
prx.py
puhutv.py
puls4.py
pyvideo.py
qdance.py
qingting.py
qqmusic.py
r7.py
radiko.py
radiocanada.py
radiocomercial.py
radiode.py
radiofrance.py
radiojavan.py
radiokapital.py
radioradicale.py
radiozet.py
radlive.py
rai.py
raywenderlich.py
rbgtum.py
rcs.py
rcti.py
rds.py
redbee.py
redbulltv.py
reddit.py
redge.py
redgifs.py
redtube.py
rentv.py
restudy.py
reuters.py
reverbnation.py
rheinmaintv.py
ridehome.py
rinsefm.py
rmcdecouverte.py
rockstargames.py
rokfin.py
roosterteeth.py
rottentomatoes.py
roya.py
rozhlas.py
rte.py
rtl2.py
rtlnl.py
rtnews.py
rtp.py
rtrfm.py
rts.py
rtvcplay.py
rtve.py
rtvs.py
rtvslo.py
rudovideo.py
rule34video.py
rumble.py
rutube.py
rutv.py
ruutu.py
ruv.py
s4c.py
safari.py
saitosan.py
samplefocus.py
sapo.py
sbs.py
sbscokr.py
screen9.py
screencast.py
screencastify.py
screencastomatic.py
screenrec.py
scrippsnetworks.py
scrolller.py
scte.py
sejmpl.py
sen.py
senalcolombia.py
senategov.py
sendtonews.py
servus.py
sevenplus.py
sexu.py
seznamzpravy.py
shahid.py
sharepoint.py
sharevideos.py
shemaroome.py
showroomlive.py
sibnet.py
simplecast.py
sina.py
sixplay.py
skeb.py
sky.py
skyit.py
skylinewebcams.py
skynewsarabia.py
skynewsau.py
slideshare.py
slideslive.py
slutload.py
smotrim.py
snapchat.py
snotr.py
softwhiteunderbelly.py
sohu.py
sonyliv.py
soundcloud.py
soundgasm.py
southpark.py
sovietscloset.py
spankbang.py
spiegel.py
spike.py
sport5.py
sportbox.py
sportdeutschland.py
spotify.py
spreaker.py
springboardplatform.py
sprout.py
sproutvideo.py
srgssr.py
srmediathek.py
stacommu.py
stageplus.py
stanfordoc.py
startrek.py
startv.py
steam.py
stitcher.py
storyfire.py
streaks.py
streamable.py
streamcz.py
streetvoice.py
stretchinternet.py
stripchat.py
stv.py
subsplash.py
substack.py
sunporno.py
sverigesradio.py
svt.py
swearnet.py
syfy.py
syvdk.py
sztvhu.py
tagesschau.py
taptap.py
tass.py
tbs.py
tbsjp.py
teachable.py
teachertube.py
teachingchannel.py
teamcoco.py
teamtreehouse.py
ted.py
tele13.py
tele5.py
telebruxelles.py
telecaribe.py
telecinco.py
telegraaf.py
telegram.py
telemb.py
telemundo.py
telequebec.py
teletask.py
telewebion.py
tempo.py
tencent.py
tennistv.py
tenplay.py
testurl.py
tf1.py
tfo.py
theguardian.py
theholetv.py
theintercept.py
theplatform.py
thestar.py
thesun.py
theweatherchannel.py
thisamericanlife.py
thisoldhouse.py
thisvid.py
threeqsdn.py
threespeak.py
tiktok.py
tmz.py
tnaflix.py
toggle.py
toggo.py
tonline.py
toongoggles.py
toutv.py
toypics.py
traileraddict.py
triller.py
trovo.py
trtcocuk.py
trtworld.py
trueid.py
trunews.py
truth.py
trutv.py
tube8.py
tubetugraz.py
tubitv.py
tumblr.py
tunein.py
turner.py
tv2.py
tv24ua.py
tv2dk.py
tv2hu.py
tv4.py
tv5mondeplus.py
tv5unis.py
tva.py
tvanouvelles.py
tvc.py
tver.py
tvigle.py
tviplayer.py
tvland.py
tvn24.py
tvnoe.py
tvopengr.py
tvp.py
tvplay.py
tvplayer.py
tvw.py
tweakers.py
twentymin.py
twentythreevideo.py
twitcasting.py
twitch.py
twitter.py
txxx.py
udemy.py
udn.py
ufctv.py
ukcolumn.py
uktvplay.py
uliza.py
umg.py
unistra.py
unity.py
unsupported.py
uol.py
uplynk.py
urort.py
urplay.py
usanetwork.py
usatoday.py
ustream.py
ustudio.py
utreon.py
varzesh3.py
vbox7.py
veo.py
vesti.py
vevo.py
vgtv.py
vh1.py
vice.py
viddler.py
videa.py
videocampus_sachsen.py
videodetective.py
videofyme.py
videoken.py
videomore.py
videopress.py
vidflex.py
vidio.py
vidlii.py
vidly.py
vidyard.py
viewlift.py
viidea.py
vimeo.py
vimm.py
viously.py
viqeo.py
viu.py
vk.py
vocaroo.py
vodpl.py
vodplatform.py
voicy.py
volejtv.py
voxmedia.py
vrsquare.py
vrt.py
vtm.py
vtv.py
vuclip.py
vvvvid.py
walla.py
washingtonpost.py
wat.py
wdr.py
webcamerapl.py
webcaster.py
webofstories.py
weibo.py
weiqitv.py
weverse.py
wevidi.py
weyyak.py
whowatch.py
whyp.py
wikimedia.py
wimbledon.py
wimtv.py
wistia.py
wordpress.py
worldstarhiphop.py
wppilot.py
wrestleuniverse.py
wsj.py
wwe.py
wykop.py
xanimu.py
xboxclips.py
xhamster.py
xiaohongshu.py
ximalaya.py
xinpianchang.py
xminus.py
xnxx.py
xstream.py
xvideos.py
xxxymovies.py
yahoo.py
yandexdisk.py
yandexmusic.py
yandexvideo.py
yapfiles.py
yappy.py
yle_areena.py
youjizz.py
youku.py
younow.py
youporn.py
zaiko.py
zapiks.py
zattoo.py
zdf.py
zee5.py
zeenews.py
zenporn.py
zetland.py
zhihu.py
zingmp3.py
zoom.py
zype.py
networking
postprocessor
utils
YoutubeDL.py
__init__.py
__main__.py
aes.py
cache.py
cookies.py
globals.py
jsinterp.py
minicurses.py
options.py
plugins.py
socks.py
update.py
version.py
webvtt.py
__init__.py
__main__.py
download.sh
manifest.json
plugin.py
README.md
src
user_config
.gitignore
LICENSE
README.md
cookies.txt
pyrightconfig.json
SolarFM/plugins/youtube_download/yt_dlp/extractor/xhamster.py

482 lines
20 KiB
Python

import itertools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
dict_get,
extract_attributes,
float_or_none,
int_or_none,
parse_duration,
str_or_none,
try_get,
unified_strdate,
url_or_none,
urljoin,
)
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)'
_VALID_URL = rf'''(?x)
https?://
(?:[^/?#]+\.)?{_DOMAINS}/
(?:
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
)
'''
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': 'e009ea6b849b129e3bebaeb9cf0dee51',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/u3Vr5F2vvcU3yK59_jJqVA/001/509/445/1280x720.8.jpg',
'uploader_url': 'https://xhamster.com/users/ruseful2011',
'description': '',
'view_count': int,
'comment_count': int,
},
}, {
'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
'info_dict': {
'id': '2221348',
'display_id': 'britney-spears-sexy-booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'timestamp': 1379123460,
'upload_date': '20130914',
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/kk5nio_iR-h4Z3frfVtoDw/002/221/348/1280x720.4.jpg',
'comment_count': int,
},
'params': {
'skip_download': True,
},
}, {
# empty seo, unavailable via new URL schema
'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': {
'id': '5667973',
'ext': 'mp4',
'title': '....',
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
'comment_count': int,
'uploader_url': 'https://xhamster.com/users/parejafree',
'description': '',
'view_count': int,
'thumbnail': 'https://thumb-nss.xhcdn.com/a/xc8MSwVKcsQeRRiTT-saMQ/005/667/973/1280x720.2.jpg',
},
'params': {
'skip_download': True,
},
}, {
# mobile site
'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
'only_matching': True,
}, {
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
'only_matching': True,
}, {
# This video is visible for marcoalfa123456's friends only
'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
'only_matching': True,
}, {
# new URL schema
'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
'only_matching': True,
}, {
'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'only_matching': True,
}, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True,
}, {
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
'only_matching': True,
}, {
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
'only_matching': True,
}, {
'url': 'https://xhamster20.desi/videos/my-verification-video-scottishmistress23-11937369',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
display_id = mobj.group('display_id') or mobj.group('display_id_2')
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
age_limit = self._rta_search(webpage)
def get_height(s):
return int_or_none(self._search_regex(
r'^(\d+)[pP]', s, 'height', default=None))
initials = self._parse_json(
self._search_regex(
(r'window\.initials\s*=\s*({.+?})\s*;\s*</script>',
r'window\.initials\s*=\s*({.+?})\s*;'), webpage, 'initials',
default='{}'),
video_id, fatal=False)
if initials:
video = initials['videoModel']
title = video['title']
formats = []
format_urls = set()
format_sizes = {}
sources = try_get(video, lambda x: x['sources'], dict) or {}
for format_id, formats_dict in sources.items():
if not isinstance(formats_dict, dict):
continue
download_sources = try_get(sources, lambda x: x['download'], dict) or {}
for quality, format_dict in download_sources.items():
if not isinstance(format_dict, dict):
continue
format_sizes[quality] = float_or_none(format_dict.get('size'))
for quality, format_item in formats_dict.items():
if format_id == 'download':
# Download link takes some time to be generated,
# skipping for now
continue
format_url = format_item
format_url = url_or_none(format_url)
if not format_url or format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'format_id': f'{format_id}-{quality}',
'url': format_url,
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
'filesize': format_sizes.get(quality),
'http_headers': {
'Referer': urlh.url,
},
})
xplayer_sources = try_get(
initials, lambda x: x['xplayerSettings']['sources'], dict)
if xplayer_sources:
hls_sources = xplayer_sources.get('hls')
if isinstance(hls_sources, dict):
for hls_format_key in ('url', 'fallback'):
hls_url = hls_sources.get(hls_format_key)
if not hls_url:
continue
hls_url = urljoin(url, hls_url)
if not hls_url or hls_url in format_urls:
continue
format_urls.add(hls_url)
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
standard_sources = xplayer_sources.get('standard')
if isinstance(standard_sources, dict):
for format_id, formats_list in standard_sources.items():
if not isinstance(formats_list, list):
continue
for standard_format in formats_list:
if not isinstance(standard_format, dict):
continue
for standard_format_key in ('url', 'fallback'):
standard_url = standard_format.get(standard_format_key)
if not standard_url:
continue
standard_url = urljoin(url, standard_url)
if not standard_url or standard_url in format_urls:
continue
format_urls.add(standard_url)
ext = determine_ext(standard_url, 'mp4')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = (str_or_none(standard_format.get('quality'))
or str_or_none(standard_format.get('label'))
or '')
formats.append({
'format_id': f'{format_id}-{quality}',
'url': standard_url,
'ext': ext,
'height': get_height(quality),
'filesize': format_sizes.get(quality),
'http_headers': {
'Referer': standard_url,
},
})
categories_list = video.get('categories')
if isinstance(categories_list, list):
categories = []
for c in categories_list:
if not isinstance(c, dict):
continue
c_name = c.get('name')
if isinstance(c_name, str):
categories.append(c_name)
else:
categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video.get('description'),
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
video, lambda x: x['author']['name'], str),
'uploader_url': uploader_url,
'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
'thumbnail': video.get('thumbURL'),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(try_get(
video, lambda x: x['rating']['likes'], int)),
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('comments')),
'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
}
# Old layout fallback
title = self._html_search_regex(
[r'<h1[^>]*>([^<]+)</h1>',
r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
webpage, 'title')
formats = []
format_urls = set()
sources = self._parse_json(
self._search_regex(
r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
default='{}'),
video_id, fatal=False)
for format_id, format_url in sources.items():
format_url = url_or_none(format_url)
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'format_id': format_id,
'url': format_url,
'height': get_height(format_id),
})
video_url = self._search_regex(
[r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
webpage, 'video url', group='mp4', default=None)
if video_url and video_url not in format_urls:
formats.append({
'url': video_url,
})
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
upload_date = unified_strdate(self._search_regex(
r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
webpage, 'upload date', fatal=False))
uploader = self._html_search_regex(
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
webpage, 'uploader', default='anonymous')
thumbnail = self._search_regex(
[r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = parse_duration(self._search_regex(
[r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'content=["\']User(?:View|Play)s:(\d+)',
webpage, 'view count', fatal=False))
mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
comment_count = mobj.group('commentcount') if mobj else 0
categories_html = self._search_regex(
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
'categories', default=None)
categories = [clean_html(category) for category in re.findall(
r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader.lower() if uploader else None,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'like_count': int_or_none(like_count),
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
'categories': categories,
'formats': formats,
}
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/xembed\.php\?video=(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
'id': '3328539',
'ext': 'mp4',
'title': 'Pen Masturbation',
'timestamp': 1406581861,
'upload_date': '20140728',
'uploader': 'ManyakisArt',
'duration': 5,
'age_limit': 18,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
rf'href="(https?://xhamster\.com/(?:movies/{video_id}/[^"]*\.html|videos/[^/]*-{video_id})[^"]*)"',
webpage, 'xhamster url', default=None)
if not video_url:
player_vars = self._parse_json(
self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
video_id)
video_url = dict_get(player_vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster')
class XHamsterUserIE(InfoExtractor):
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)'
_TESTS = [{
# Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos',
'info_dict': {
'id': 'netvideogirls',
},
'playlist_mincount': 267,
}, {
# Non-paginated user profile
'url': 'https://xhamster.com/users/firatkaan/videos',
'info_dict': {
'id': 'firatkaan',
},
'playlist_mincount': 1,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,
}, {
'url': 'https://xhvid.com/users/pelushe21',
'only_matching': True,
}]
def _entries(self, user_id, is_user):
prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive')
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1):
page = self._download_webpage(
next_page_url, user_id, f'Downloading page {pagenum}')
for video_tag in re.findall(
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
page):
video = extract_attributes(video_tag)
video_url = url_or_none(video.get('href'))
if not video_url or not XHamsterIE.suitable(video_url):
continue
video_id = XHamsterIE._match_id(video_url)
yield self.url_result(
video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
if not mobj:
break
next_page = extract_attributes(mobj.group(0))
next_page_url = url_or_none(next_page.get('href'))
if not next_page_url:
break
def _real_extract(self, url):
user, user_id = self._match_valid_url(url).group('user', 'id')
return self.playlist_result(self._entries(user_id, bool(user)), user_id)