Files
debs
images
plugins
archiver
disk_usage
favorites
file_properties
git_clone
movie_tv_info
py_run
searcher
template
thumbnailer
translate
trasher
vod_thumbnailer
youtube_download
yt_dlp
__pyinstaller
compat
dependencies
downloader
extractor
youtube
__init__.py
_extractors.py
abc.py
abcnews.py
abcotvs.py
abematv.py
academicearth.py
acast.py
acfun.py
adn.py
adobeconnect.py
adobepass.py
adobetv.py
adultswim.py
aenetworks.py
aeonco.py
afreecatv.py
agora.py
airtv.py
aitube.py
aliexpress.py
aljazeera.py
allocine.py
allstar.py
alphaporno.py
alsace20tv.py
altcensored.py
alura.py
amadeustv.py
amara.py
amazon.py
amazonminitv.py
amcnetworks.py
americastestkitchen.py
amp.py
anchorfm.py
angel.py
antenna.py
anvato.py
aol.py
apa.py
aparat.py
appleconnect.py
applepodcasts.py
appletrailers.py
archiveorg.py
arcpublishing.py
ard.py
arkena.py
arnes.py
art19.py
arte.py
asobichannel.py
asobistage.py
atresplayer.py
atscaleconf.py
atvat.py
audimedia.py
audioboom.py
audiodraft.py
audiomack.py
audius.py
awaan.py
aws.py
axs.py
azmedien.py
baidu.py
banbye.py
bandaichannel.py
bandcamp.py
bandlab.py
bannedvideo.py
bbc.py
beacon.py
beatbump.py
beatport.py
beeg.py
behindkink.py
bellmedia.py
berufetv.py
bet.py
bfi.py
bfmtv.py
bibeltv.py
bigflix.py
bigo.py
bild.py
bilibili.py
biobiochiletv.py
bitchute.py
blackboardcollaborate.py
bleacherreport.py
blerp.py
blogger.py
bloomberg.py
bluesky.py
bokecc.py
bongacams.py
boosty.py
bostonglobe.py
box.py
boxcast.py
bpb.py
br.py
brainpop.py
bravotv.py
breitbart.py
brightcove.py
brilliantpala.py
bundesliga.py
bundestag.py
bunnycdn.py
businessinsider.py
buzzfeed.py
byutv.py
c56.py
caffeinetv.py
callin.py
caltrans.py
cam4.py
camdemy.py
camfm.py
cammodels.py
camsoda.py
camtasia.py
canal1.py
canalalpha.py
canalc2.py
canalplus.py
canalsurmas.py
caracoltv.py
cartoonnetwork.py
cbc.py
cbs.py
cbsnews.py
cbssports.py
ccc.py
ccma.py
cctv.py
cda.py
cellebrite.py
ceskatelevize.py
cgtn.py
charlierose.py
chaturbate.py
chilloutzone.py
chzzk.py
cinemax.py
cinetecamilano.py
cineverse.py
ciscolive.py
ciscowebex.py
cjsw.py
clipchamp.py
clippit.py
cliprs.py
closertotruth.py
cloudflarestream.py
cloudycdn.py
clubic.py
clyp.py
cmt.py
cnbc.py
cnn.py
comedycentral.py
common.py
commonmistakes.py
commonprotocols.py
condenast.py
contv.py
corus.py
coub.py
cozytv.py
cpac.py
cracked.py
crackle.py
craftsy.py
crooksandliars.py
crowdbunker.py
crtvg.py
cspan.py
ctsnews.py
ctv.py
ctvnews.py
cultureunplugged.py
curiositystream.py
cwtv.py
cybrary.py
dacast.py
dailymail.py
dailymotion.py
dailywire.py
damtomo.py
dangalplay.py
daum.py
daystar.py
dbtv.py
dctp.py
democracynow.py
detik.py
deuxm.py
dfb.py
dhm.py
digitalconcerthall.py
digiteka.py
digiview.py
discogs.py
disney.py
dispeak.py
dlf.py
dlive.py
douyutv.py
dplay.py
drbonanza.py
dreisat.py
drooble.py
dropbox.py
dropout.py
drtalks.py
drtuber.py
drtv.py
dtube.py
duboku.py
dumpert.py
duoplay.py
dvtv.py
dw.py
eagleplatform.py
ebaumsworld.py
ebay.py
egghead.py
eggs.py
eighttracks.py
eitb.py
elementorembed.py
elonet.py
elpais.py
eltrecetv.py
embedly.py
epicon.py
epidemicsound.py
eplus.py
epoch.py
eporner.py
erocast.py
eroprofile.py
err.py
ertgr.py
espn.py
ettutv.py
europa.py
europeantour.py
eurosport.py
euscreen.py
expressen.py
extractors.py
eyedotv.py
facebook.py
fancode.py
fathom.py
faz.py
fc2.py
fczenit.py
fifa.py
filmon.py
filmweb.py
firsttv.py
fivetv.py
flextv.py
flickr.py
floatplane.py
folketinget.py
footyroom.py
formula1.py
fourtube.py
fox.py
fox9.py
foxnews.py
foxsports.py
fptplay.py
francaisfacile.py
franceinter.py
francetv.py
freesound.py
freespeech.py
freetv.py
frontendmasters.py
fujitv.py
funk.py
funker530.py
fuyintv.py
gab.py
gaia.py
gamedevtv.py
gamejolt.py
gamespot.py
gamestar.py
gaskrank.py
gazeta.py
gbnews.py
gdcvault.py
gedidigital.py
generic.py
genericembeds.py
genius.py
germanupa.py
getcourseru.py
gettr.py
giantbomb.py
glide.py
globalplayer.py
globo.py
glomex.py
gmanetwork.py
go.py
godresource.py
godtube.py
gofile.py
golem.py
goodgame.py
googledrive.py
googlepodcasts.py
googlesearch.py
goplay.py
gopro.py
goshgay.py
gotostage.py
gputechconf.py
graspop.py
gronkh.py
groupon.py
harpodeon.py
hbo.py
hearthisat.py
heise.py
hellporno.py
hgtv.py
hidive.py
historicfilms.py
hitrecord.py
hketv.py
hollywoodreporter.py
holodex.py
hotnewhiphop.py
hotstar.py
hrefli.py
hrfensehen.py
hrti.py
hse.py
huajiao.py
huffpost.py
hungama.py
huya.py
hypem.py
hypergryph.py
hytale.py
icareus.py
ichinanalive.py
idolplus.py
ign.py
iheart.py
ilpost.py
iltalehti.py
imdb.py
imggaming.py
imgur.py
ina.py
inc.py
indavideo.py
infoq.py
instagram.py
internazionale.py
internetvideoarchive.py
iprima.py
iqiyi.py
islamchannel.py
israelnationalnews.py
itprotv.py
itv.py
ivi.py
ivideon.py
ivoox.py
iwara.py
ixigua.py
izlesene.py
jamendo.py
japandiet.py
jeuxvideo.py
jiocinema.py
jiosaavn.py
jixie.py
joj.py
joqrag.py
jove.py
jstream.py
jtbc.py
jwplatform.py
kakao.py
kaltura.py
kankanews.py
karaoketv.py
kelbyone.py
kenh14.py
khanacademy.py
kick.py
kicker.py
kickstarter.py
kika.py
kinja.py
kinopoisk.py
kommunetv.py
kompas.py
koo.py
krasview.py
kth.py
ku6.py
kukululive.py
kuwo.py
la7.py
laracasts.py
lastfm.py
laxarxames.py
lbry.py
lci.py
lcp.py
learningonscreen.py
lecture2go.py
lecturio.py
leeco.py
lefigaro.py
lego.py
lemonde.py
lenta.py
libraryofcongress.py
libsyn.py
lifenews.py
likee.py
limelight.py
linkedin.py
liputan6.py
listennotes.py
litv.py
livejournal.py
livestream.py
livestreamfails.py
lnk.py
loco.py
loom.py
lovehomeporn.py
lrt.py
lsm.py
lumni.py
lynda.py
maariv.py
magellantv.py
magentamusik.py
mailru.py
mainstreaming.py
mangomolo.py
manoto.py
manyvids.py
maoritv.py
markiza.py
massengeschmacktv.py
masters.py
matchtv.py
mbn.py
mdr.py
medaltv.py
mediaite.py
mediaklikk.py
medialaan.py
mediaset.py
mediasite.py
mediastream.py
mediaworksnz.py
medici.py
megaphone.py
megatvcom.py
meipai.py
melonvod.py
metacritic.py
mgtv.py
microsoftembed.py
microsoftstream.py
minds.py
minoto.py
mirrativ.py
mirrorcouk.py
mit.py
mitele.py
mixch.py
mixcloud.py
mlb.py
mlssoccer.py
mocha.py
mojevideo.py
mojvideo.py
monstercat.py
motherless.py
motorsport.py
moviepilot.py
moview.py
moviezine.py
movingimage.py
msn.py
mtv.py
muenchentv.py
murrtube.py
museai.py
musescore.py
musicdex.py
mx3.py
mxplayer.py
myspace.py
myspass.py
myvideoge.py
myvidster.py
mzaalo.py
n1.py
nate.py
nationalgeographic.py
naver.py
nba.py
nbc.py
ndr.py
ndtv.py
nebula.py
nekohacker.py
nerdcubed.py
nest.py
neteasemusic.py
netverse.py
netzkino.py
newgrounds.py
newspicks.py
newsy.py
nextmedia.py
nexx.py
nfb.py
nfhsnetwork.py
nfl.py
nhk.py
nhl.py
nick.py
niconico.py
niconicochannelplus.py
ninaprotocol.py
ninecninemedia.py
ninegag.py
ninenews.py
ninenow.py
nintendo.py
nitter.py
nobelprize.py
noice.py
nonktube.py
noodlemagazine.py
noovo.py
nosnl.py
nova.py
novaplay.py
nowness.py
noz.py
npo.py
npr.py
nrk.py
nrl.py
nts.py
ntvcojp.py
ntvde.py
ntvru.py
nubilesporn.py
nuevo.py
nuum.py
nuvid.py
nytimes.py
nzherald.py
nzonscreen.py
nzz.py
odkmedia.py
odnoklassniki.py
oftv.py
oktoberfesttv.py
olympics.py
on24.py
once.py
ondemandkorea.py
onefootball.py
onenewsnz.py
oneplace.py
onet.py
onionstudios.py
opencast.py
openload.py
openrec.py
ora.py
orf.py
outsidetv.py
owncloud.py
packtpub.py
palcomp3.py
panopto.py
paramountplus.py
parler.py
parlview.py
parti.py
patreon.py
pbs.py
pearvideo.py
peekvids.py
peertube.py
peertv.py
peloton.py
performgroup.py
periscope.py
pgatour.py
philharmoniedeparis.py
phoenix.py
photobucket.py
pialive.py
piapro.py
picarto.py
piksel.py
pinkbike.py
pinterest.py
piramidetv.py
pixivsketch.py
pladform.py
planetmarathi.py
platzi.py
playplustv.py
playsuisse.py
playtvak.py
playwire.py
pluralsight.py
plutotv.py
plvideo.py
podbayfm.py
podchaser.py
podomatic.py
pokergo.py
polsatgo.py
polskieradio.py
popcorntimes.py
popcorntv.py
pornbox.py
pornflip.py
pornhub.py
pornotube.py
pornovoisines.py
pornoxo.py
pr0gramm.py
prankcast.py
premiershiprugby.py
presstv.py
projectveritas.py
prosiebensat1.py
prx.py
puhutv.py
puls4.py
pyvideo.py
qdance.py
qingting.py
qqmusic.py
r7.py
radiko.py
radiocanada.py
radiocomercial.py
radiode.py
radiofrance.py
radiojavan.py
radiokapital.py
radioradicale.py
radiozet.py
radlive.py
rai.py
raywenderlich.py
rbgtum.py
rcs.py
rcti.py
rds.py
redbee.py
redbulltv.py
reddit.py
redge.py
redgifs.py
redtube.py
rentv.py
restudy.py
reuters.py
reverbnation.py
rheinmaintv.py
ridehome.py
rinsefm.py
rmcdecouverte.py
rockstargames.py
rokfin.py
roosterteeth.py
rottentomatoes.py
roya.py
rozhlas.py
rte.py
rtl2.py
rtlnl.py
rtnews.py
rtp.py
rtrfm.py
rts.py
rtvcplay.py
rtve.py
rtvs.py
rtvslo.py
rudovideo.py
rule34video.py
rumble.py
rutube.py
rutv.py
ruutu.py
ruv.py
s4c.py
safari.py
saitosan.py
samplefocus.py
sapo.py
sbs.py
sbscokr.py
screen9.py
screencast.py
screencastify.py
screencastomatic.py
screenrec.py
scrippsnetworks.py
scrolller.py
scte.py
sejmpl.py
sen.py
senalcolombia.py
senategov.py
sendtonews.py
servus.py
sevenplus.py
sexu.py
seznamzpravy.py
shahid.py
sharepoint.py
sharevideos.py
shemaroome.py
showroomlive.py
sibnet.py
simplecast.py
sina.py
sixplay.py
skeb.py
sky.py
skyit.py
skylinewebcams.py
skynewsarabia.py
skynewsau.py
slideshare.py
slideslive.py
slutload.py
smotrim.py
snapchat.py
snotr.py
softwhiteunderbelly.py
sohu.py
sonyliv.py
soundcloud.py
soundgasm.py
southpark.py
sovietscloset.py
spankbang.py
spiegel.py
spike.py
sport5.py
sportbox.py
sportdeutschland.py
spotify.py
spreaker.py
springboardplatform.py
sprout.py
sproutvideo.py
srgssr.py
srmediathek.py
stacommu.py
stageplus.py
stanfordoc.py
startrek.py
startv.py
steam.py
stitcher.py
storyfire.py
streaks.py
streamable.py
streamcz.py
streetvoice.py
stretchinternet.py
stripchat.py
stv.py
subsplash.py
substack.py
sunporno.py
sverigesradio.py
svt.py
swearnet.py
syfy.py
syvdk.py
sztvhu.py
tagesschau.py
taptap.py
tass.py
tbs.py
tbsjp.py
teachable.py
teachertube.py
teachingchannel.py
teamcoco.py
teamtreehouse.py
ted.py
tele13.py
tele5.py
telebruxelles.py
telecaribe.py
telecinco.py
telegraaf.py
telegram.py
telemb.py
telemundo.py
telequebec.py
teletask.py
telewebion.py
tempo.py
tencent.py
tennistv.py
tenplay.py
testurl.py
tf1.py
tfo.py
theguardian.py
theholetv.py
theintercept.py
theplatform.py
thestar.py
thesun.py
theweatherchannel.py
thisamericanlife.py
thisoldhouse.py
thisvid.py
threeqsdn.py
threespeak.py
tiktok.py
tmz.py
tnaflix.py
toggle.py
toggo.py
tonline.py
toongoggles.py
toutv.py
toypics.py
traileraddict.py
triller.py
trovo.py
trtcocuk.py
trtworld.py
trueid.py
trunews.py
truth.py
trutv.py
tube8.py
tubetugraz.py
tubitv.py
tumblr.py
tunein.py
turner.py
tv2.py
tv24ua.py
tv2dk.py
tv2hu.py
tv4.py
tv5mondeplus.py
tv5unis.py
tva.py
tvanouvelles.py
tvc.py
tver.py
tvigle.py
tviplayer.py
tvland.py
tvn24.py
tvnoe.py
tvopengr.py
tvp.py
tvplay.py
tvplayer.py
tvw.py
tweakers.py
twentymin.py
twentythreevideo.py
twitcasting.py
twitch.py
twitter.py
txxx.py
udemy.py
udn.py
ufctv.py
ukcolumn.py
uktvplay.py
uliza.py
umg.py
unistra.py
unity.py
unsupported.py
uol.py
uplynk.py
urort.py
urplay.py
usanetwork.py
usatoday.py
ustream.py
ustudio.py
utreon.py
varzesh3.py
vbox7.py
veo.py
vesti.py
vevo.py
vgtv.py
vh1.py
vice.py
viddler.py
videa.py
videocampus_sachsen.py
videodetective.py
videofyme.py
videoken.py
videomore.py
videopress.py
vidflex.py
vidio.py
vidlii.py
vidly.py
vidyard.py
viewlift.py
viidea.py
vimeo.py
vimm.py
viously.py
viqeo.py
viu.py
vk.py
vocaroo.py
vodpl.py
vodplatform.py
voicy.py
volejtv.py
voxmedia.py
vrsquare.py
vrt.py
vtm.py
vtv.py
vuclip.py
vvvvid.py
walla.py
washingtonpost.py
wat.py
wdr.py
webcamerapl.py
webcaster.py
webofstories.py
weibo.py
weiqitv.py
weverse.py
wevidi.py
weyyak.py
whowatch.py
whyp.py
wikimedia.py
wimbledon.py
wimtv.py
wistia.py
wordpress.py
worldstarhiphop.py
wppilot.py
wrestleuniverse.py
wsj.py
wwe.py
wykop.py
xanimu.py
xboxclips.py
xhamster.py
xiaohongshu.py
ximalaya.py
xinpianchang.py
xminus.py
xnxx.py
xstream.py
xvideos.py
xxxymovies.py
yahoo.py
yandexdisk.py
yandexmusic.py
yandexvideo.py
yapfiles.py
yappy.py
yle_areena.py
youjizz.py
youku.py
younow.py
youporn.py
zaiko.py
zapiks.py
zattoo.py
zdf.py
zee5.py
zeenews.py
zenporn.py
zetland.py
zhihu.py
zingmp3.py
zoom.py
zype.py
networking
postprocessor
utils
YoutubeDL.py
__init__.py
__main__.py
aes.py
cache.py
cookies.py
globals.py
jsinterp.py
minicurses.py
options.py
plugins.py
socks.py
update.py
version.py
webvtt.py
__init__.py
__main__.py
download.sh
manifest.json
plugin.py
README.md
src
user_config
.gitignore
LICENSE
README.md
pyrightconfig.json
SolarFM/plugins/youtube_download/yt_dlp/extractor/japandiet.py

278 lines
10 KiB
Python
Raw Normal View History

2023-02-20 19:18:45 -06:00
import re
2025-05-02 16:11:08 -05:00
from .common import InfoExtractor
2023-02-20 19:18:45 -06:00
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
parse_qs,
smuggle_url,
traverse_obj,
try_call,
2025-05-02 16:11:08 -05:00
unsmuggle_url,
2023-02-20 19:18:45 -06:00
)
def _parse_japanese_date(text):
if not text:
return None
ERA_TABLE = {
'明治': 1868,
'大正': 1912,
'昭和': 1926,
'平成': 1989,
'令和': 2019,
}
ERA_RE = '|'.join(map(re.escape, ERA_TABLE.keys()))
mobj = re.search(rf'({ERA_RE})?(\d+)年(\d+)月(\d+)日', re.sub(r'[\s\u3000]+', '', text))
if not mobj:
return None
era, year, month, day = mobj.groups()
year, month, day = map(int, (year, month, day))
if era:
# example input: 令和5年3月34日
# even though each era have their end, don't check here
year += ERA_TABLE[era]
return '%04d%02d%02d' % (year, month, day)
def _parse_japanese_duration(text):
mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or ''))
if not mobj:
return
2025-05-02 16:11:08 -05:00
days, hours, mins, secs = (int_or_none(x, default=0) for x in mobj.groups())
2023-02-20 19:18:45 -06:00
return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60
class ShugiinItvBaseIE(InfoExtractor):
_INDEX_ROOMS = None
@classmethod
def _find_rooms(cls, webpage):
return [{
'_type': 'url',
'id': x.group(1),
'title': clean_html(x.group(2)).strip(),
'url': smuggle_url(f'https://www.shugiintv.go.jp/jp/index.php?room_id={x.group(1)}', {'g': x.groups()}),
'ie_key': ShugiinItvLiveIE.ie_key(),
} for x in re.finditer(r'(?s)<a\s+href="[^"]+\?room_id=(room\d+)"\s*class="play_live".+?class="s12_14">(.+?)</td>', webpage)]
def _fetch_rooms(self):
if not self._INDEX_ROOMS:
webpage = self._download_webpage(
'https://www.shugiintv.go.jp/jp/index.php', None,
encoding='euc-jp', note='Downloading proceedings info')
ShugiinItvBaseIE._INDEX_ROOMS = self._find_rooms(webpage)
return self._INDEX_ROOMS
class ShugiinItvLiveIE(ShugiinItvBaseIE):
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)(?:/index\.php)?$'
IE_DESC = '衆議院インターネット審議中継'
_TESTS = [{
'url': 'https://www.shugiintv.go.jp/jp/index.php',
'info_dict': {
'_type': 'playlist',
'title': 'All proceedings for today',
},
# expect at least one proceedings is running
'playlist_mincount': 1,
}]
@classmethod
def suitable(cls, url):
return super().suitable(url) and not any(x.suitable(url) for x in (ShugiinItvLiveRoomIE, ShugiinItvVodIE))
def _real_extract(self, url):
self.to_screen(
'Downloading all running proceedings. To specify one proceeding, use direct link from the website')
return self.playlist_result(self._fetch_rooms(), playlist_title='All proceedings for today')
class ShugiinItvLiveRoomIE(ShugiinItvBaseIE):
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?room_id=(?P<id>room\d+)'
IE_DESC = '衆議院インターネット審議中継 (中継)'
_TESTS = [{
'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room01',
'info_dict': {
'id': 'room01',
'title': '内閣委員会',
},
'skip': 'this runs for a time and not every day',
}, {
'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room11',
'info_dict': {
'id': 'room11',
'title': '外務委員会',
},
'skip': 'this runs for a time and not every day',
}]
def _real_extract(self, url):
url, smug = unsmuggle_url(url, default={})
if smug.get('g'):
room_id, title = smug['g']
else:
room_id = self._match_id(url)
title = traverse_obj(self._fetch_rooms(), (lambda k, v: v['id'] == room_id, 'title'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8',
room_id, ext='mp4')
return {
'id': room_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}
class ShugiinItvVodIE(ShugiinItvBaseIE):
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?ex=VL(?:\&[^=]+=[^&]*)*\&deli_id=(?P<id>\d+)'
IE_DESC = '衆議院インターネット審議中継 (ビデオライブラリ)'
_TESTS = [{
'url': 'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id=53846',
'info_dict': {
'id': '53846',
'title': 'ウクライナ大統領国会演説(オンライン)',
'release_date': '20220323',
'chapters': 'count:4',
2025-05-02 16:11:08 -05:00
},
2023-02-20 19:18:45 -06:00
}, {
'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846',
2025-05-02 16:11:08 -05:00
'only_matching': True,
2023-02-20 19:18:45 -06:00
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
f'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id={video_id}', video_id,
encoding='euc-jp')
m3u8_url = self._search_regex(
r'id="vtag_src_base_vod"\s*value="(http.+?\.m3u8)"', webpage, 'm3u8 url')
m3u8_url = re.sub(r'^http://', 'https://', m3u8_url)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, ext='mp4')
title = self._html_search_regex(
(r'<td\s+align="left">(.+)\s*\(\d+分\)',
r'<TD.+?<IMG\s*src=".+?/spacer\.gif".+?height="15">(.+?)<IMG'), webpage, 'title', fatal=False)
release_date = _parse_japanese_date(self._html_search_regex(
r'開会日</td>\s*<td.+?/td>\s*<TD>(.+?)</TD>',
webpage, 'title', fatal=False))
chapters = []
for chp in re.finditer(r'(?i)<A\s+HREF="([^"]+?)"\s*class="play_vod">(?!<img)(.+)</[Aa]>', webpage):
chapters.append({
'title': clean_html(chp.group(2)).strip(),
'start_time': try_call(lambda: float(parse_qs(chp.group(1))['time'][0].strip())),
})
# NOTE: there are blanks at the first and the end of the videos,
# so getting/providing the video duration is not possible
# also, the exact end_time for the last chapter is unknown (we can get at most minutes of granularity)
last_tr = re.findall(r'(?s)<TR\s*class="s14_24">(.+?)</TR>', webpage)[-1]
if last_tr and chapters:
last_td = re.findall(r'<TD.+?</TD>', last_tr)[-1]
if last_td:
chapters[-1]['end_time'] = chapters[-1]['start_time'] + _parse_japanese_duration(clean_html(last_td))
return {
'id': video_id,
'title': title,
'release_date': release_date,
'chapters': chapters,
'formats': formats,
'subtitles': subtitles,
}
class SangiinInstructionIE(InfoExtractor):
2025-05-02 16:11:08 -05:00
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
2023-02-20 19:18:45 -06:00
IE_DESC = False # this shouldn't be listed as a supported site
def _real_extract(self, url):
2025-05-02 16:11:08 -05:00
raise ExtractorError(
'Copy the link from the button below the video description/player '
'and use that link to download. If there is no button in the frame, '
'get the URL of the frame showing the video.', expected=True)
2023-02-20 19:18:45 -06:00
class SangiinIE(InfoExtractor):
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/detail\.php\?sid=(?P<id>\d+)'
IE_DESC = '参議院インターネット審議中継 (archive)'
_TESTS = [{
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7052',
'info_dict': {
'id': '7052',
'title': '2022年10月7日 本会議',
'description': 'md5:0a5fed523f95c88105a0b0bf1dd71489',
'upload_date': '20221007',
'ext': 'mp4',
},
}, {
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7037',
'info_dict': {
'id': '7037',
'title': '2022年10月3日 開会式',
'upload_date': '20221003',
'ext': 'mp4',
},
}, {
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7076',
'info_dict': {
'id': '7076',
'title': '2022年10月27日 法務委員会',
'upload_date': '20221027',
'ext': 'mp4',
'is_live': True,
},
'skip': 'this live is turned into archive after it ends',
2025-05-02 16:11:08 -05:00
}]
2023-02-20 19:18:45 -06:00
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
date = self._html_search_regex(
r'<dt[^>]*>\s*開会日\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
'date', fatal=False)
upload_date = _parse_japanese_date(date)
title = self._html_search_regex(
r'<dt[^>]*>\s*会議名\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
'date', fatal=False)
# some videos don't have the elements, so assume it's missing
description = self._html_search_regex(
r'会議の経過\s*</h3>\s*<span[^>]*>(.+?)</span>', webpage,
'description', default=None)
# this row appears only when it's livestream
is_live = bool(self._html_search_regex(
r'<dt[^>]*>\s*公報掲載時刻\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
'is_live', default=None))
m3u8_url = self._search_regex(
r'var\s+videopath\s*=\s*(["\'])([^"\']+)\1', webpage,
'm3u8 url', group=2)
formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
return {
'id': video_id,
'title': join_nonempty(date, title, delim=' '),
'description': description,
'upload_date': upload_date,
'formats': formats,
'subtitles': subs,
'is_live': is_live,
}