Unverified Commit 3d121b3c authored by nao20010128nao's avatar nao20010128nao
Browse files

Merge branch 'master' of github.com:ytdl-patched/ytdl-patched; branch 'master'...

Merge branch 'master' of github.com:ytdl-patched/ytdl-patched; branch 'master' of https://github.com/ytdl-org/youtube-dl

* 'master' of github.com:ytdl-patched/ytdl-patched:
* 'master' of https://github.com/ytdl-org/youtube-dl:
  [liveleak] Remove extractor (closes #17625, closes #24222) (#29331)
  [pornhub] Add support for pornhubthbh7ap3u.onion
  [pornhub] Detect geo restriction
  [pornhub] Dismiss tbr extracted from download URLs (closes #28927)
  [curiositystream:collection] Extend _VALID_URL (closes #26326, closes #29117)
  [youtube] Make get_video_info processing more robust (closes #29333)
  [youtube] Workaround for get_video_info request (refs #29333)
  [bilibili] Strip uploader name (#29202)
  [youtube] Update invidious instance list (#29281)
  [umg:de] Update GraphQL API URL (#29304)
  [nrk] Switch psapi URL to https (closes #29344)
  [postprocessor/ffmpeg] Show ffmpeg output on error (refs #22680) (#29336)
parents 4f1050e9 379f52a4
Pipeline #6110 passed with stage
in 1 minute and 4 seconds
......@@ -237,7 +237,7 @@ class BiliBiliIE(InfoExtractor):
webpage)
if uploader_mobj:
info.update({
'uploader': uploader_mobj.group('name'),
'uploader': uploader_mobj.group('name').strip(),
'uploader_id': uploader_mobj.group('id'),
})
if not info.get('uploader'):
......
......@@ -145,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collection'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://app.curiositystream.com/collection/2',
'info_dict': {
......@@ -157,6 +157,9 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
}, {
'url': 'https://curiositystream.com/series/2',
'only_matching': True,
}, {
'url': 'https://curiositystream.com/collections/36',
'only_matching': True,
}]
def _real_extract(self, url):
......
......@@ -631,10 +631,6 @@ from .linkedin import (
from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
from .livejournal import LiveJournalIE
from .liveleak import (
LiveLeakIE,
LiveLeakEmbedIE,
)
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
......
......@@ -86,7 +86,6 @@ from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .arkena import ArkenaIE
from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .kaltura import KalturaIE
......@@ -1636,31 +1635,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409',
},
},
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Another LiveLeak embed pattern (#13336)
{
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
'info_dict': {
'id': '2eb_1496309988',
'ext': 'mp4',
'title': 'Thief robs place where everyone was armed',
'description': 'md5:694d73ee79e535953cf2488562288eee',
'uploader': 'brazilwtf',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs
{
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
......@@ -3290,11 +3264,6 @@ class GenericIE(InfoExtractor):
waitlist.append(self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()))
# Look for LiveLeak embeds
liveleak_urls = LiveLeakIE._extract_urls(webpage)
if liveleak_urls:
waitlist.append(self.playlist_from_matches(liveleak_urls, video_id, video_title))
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url:
......
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'info_dict': {
'id': '757_1364311680',
'ext': 'mp4',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'id': 'f93_1390833151',
'ext': 'mp4',
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Prochan embed
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
'id': '4f7_1392687779',
'ext': 'mp4',
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
'uploader': 'CapObveus',
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
},
'skip': 'Video is dead',
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
# Multiple resolutions
'url': 'http://www.liveleak.com/view?i=801_1409392012',
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
'info_dict': {
'id': '801_1409392012',
'ext': 'mp4',
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
'uploader': 'bony333',
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
'id': '763_1473349649',
'ext': 'mp4',
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
'uploader': 'Ziz',
'upload_date': '20160908',
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.liveleak.com/view?i=677_1439397581',
'info_dict': {
'id': '677_1439397581',
'title': 'Fuel Depot in China Explosion caught on video',
},
'playlist_count': 3,
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
}, {
# No original video
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
video_thumbnail = self._og_search_thumbnail(webpage)
entries = self._parse_html5_media_entries(url, webpage, video_id)
if not entries:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
for idx, info_dict in enumerate(entries):
formats = []
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
formats.append(a_format)
# Removing '.*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/ytdl-org/youtube-dl/pull/4768)
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
if a_format['url'] != orig_url:
format_id = a_format.get('format_id')
format_id = 'original' + ('-' + format_id if format_id else '')
if self._is_valid_url(orig_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': orig_url,
'preference': 1,
})
self._sort_formats(formats)
info_dict['formats'] = formats
# Don't append entry ID for one-video pages to keep backward compatibility
if len(entries) > 1:
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
else:
info_dict['id'] = video_id
info_dict.update({
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
})
return self.playlist_result(entries, video_id, video_title)
class LiveLeakEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
# See generic.py for actual test cases
_TESTS = [{
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
'only_matching': True,
}, {
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
'only_matching': True,
}]
def _real_extract(self, url):
kind, video_id = re.match(self._VALID_URL, url).groups()
if kind == 'f':
webpage = self._download_webpage(url, video_id)
liveleak_url = self._search_regex(
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
webpage, 'LiveLeak URL', group='url')
else:
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
......@@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json(
urljoin('http://psapi.nrk.no/', path),
urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'})
......
......@@ -30,6 +30,7 @@ from ..utils import (
class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
......@@ -122,11 +123,13 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:[^/]+\.)?
%s
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
'''
''' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
......@@ -236,6 +239,13 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
}, {
# geo restricted
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}]
@staticmethod
......@@ -275,6 +285,11 @@ class PornHubIE(PornHubBaseIE):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
if any(re.search(p, webpage) for p in (
r'class=["\']geoBlocked["\']',
r'>\s*This content is unavailable in your country')):
self.raise_geo_restricted()
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
......@@ -408,17 +423,14 @@ class PornHubIE(PornHubBaseIE):
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
return
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
if mobj:
if not height:
height = int(mobj.group('height'))
tbr = int(mobj.group('tbr'))
if not height:
height = int_or_none(self._search_regex(
r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
default=None))
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
'tbr': tbr,
})
for video_url, height in video_urls:
......@@ -440,7 +452,8 @@ class PornHubIE(PornHubBaseIE):
add_format(video_url, height)
continue
add_format(video_url)
self._sort_formats(formats)
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
......@@ -513,7 +526,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118,
......@@ -542,6 +555,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
'only_matching': True,
}]
def _real_extract(self, url):
......@@ -617,7 +633,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True,
......@@ -722,6 +738,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
'only_matching': True,
}]
@classmethod
......@@ -732,7 +751,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': {
......@@ -742,6 +761,9 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
}, {
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}]
......
......@@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://api.universal-music.de/graphql',
'https://graphql.universal-music.de/',
video_id, query={
'query': '''{
universalMusic(channel:16) {
......@@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
formats = []
def add_m3u8_format(format_id):
m3u8_formats = self._extract_m3u8_formats(
formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal='False')
if m3u8_formats and m3u8_formats[0].get('height'):
formats.extend(m3u8_formats)
'm3u8_native', m3u8_id='hls', fatal=False))
for f in video_data.get('formats', []):
f_url = f.get('url')
......
......@@ -392,7 +392,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.13ad\.de',
r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr',
r'(?:www\.)?invidious\.kavin\.rocks',
r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech',
......@@ -419,6 +419,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com',
r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.silkky\.cloud',
r'(?:www\.)?invidious\.exonip\.de',
r'(?:www\.)?invidious\.riverside\.rocks',
r'(?:www\.)?invidious\.blamefran\.net',
r'(?:www\.)?invidious\.moomoo\.de',
r'(?:www\.)?ytb\.trom\.tf',
r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
......@@ -427,6 +435,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
)
_VALID_URL = r"""(?x)^
(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment