Skip to content
Snippets Groups Projects
Commit 35264817 authored by Lesmiscore's avatar Lesmiscore
Browse files

extractor/youtube: download manifestless post-live videos from the start

parent ab790759
No related branches found
Tags 1655130352
No related merge requests found
......@@ -13,7 +13,8 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
return FFmpegFD
elif (set(downloaders) == {DashSegmentsFD}
and not (to_stdout and len(protocols) > 1)
and set(protocols) == {'http_dash_segments_generator'}):
and set(protocols) == {'http_dash_segments_generator'}
and info_dict.get('is_live')):
return DashSegmentsFD
elif len(downloaders) == 1:
return downloaders[0]
......
......@@ -2503,10 +2503,11 @@ def __init__(self, *args, **kwargs):
self._code_cache = {}
self._player_cache = {}
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, plml):
# plml = Post-live manifestless
lock = threading.Lock()
is_live = True
is_live = not plml
start_time = time.time()
formats = [f for f in formats if f.get('is_from_start')]
......@@ -2542,12 +2543,12 @@ def mpd_feed(format_id, delay):
return f['manifest_url'], f['manifest_stream_number'], is_live
for f in formats:
f['is_live'] = True
f['is_live'] = is_live
f['protocol'] = 'http_dash_segments_generator'
f['fragments'] = functools.partial(
self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed, plml)
def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, plml, ctx):
FETCH_SPAN, MAX_DURATION = 5, 432000
mpd_url, stream_number, is_live = None, None, True
......@@ -2647,6 +2648,11 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
except ExtractorError:
continue
if plml:
# Stop at the first iteration if running for post-live manifestless;
# fragment count no longer increase since it starts
break
time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
def _extract_player_url(self, *ytcfgs, webpage=None):
......@@ -3425,7 +3431,7 @@ def append_client(*client_names):
self.report_warning(last_error)
return prs, player_url
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, plml, duration):
itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {0: None}
q = qualities([
......@@ -3572,7 +3578,7 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i
dct['container'] = dct['ext'] + '_dash'
yield dct
live_from_start = is_live and self.get_param('live_from_start')
live_from_start = is_live and self.get_param('live_from_start') or plml
skip_manifests = self._configuration_arg('skip')
if not self.get_param('youtube_include_hls_manifest', True):
skip_manifests.append('hls')
......@@ -3676,14 +3682,14 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
return webpage, master_ytcfg, player_responses, player_url
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, plml=False, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
is_live = get_first(video_details, 'isLive')
if is_live is None:
is_live = get_first(live_broadcast_details, 'isLiveNow')
streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, plml, duration)
return live_broadcast_details, is_live, streaming_data, formats, subtitles
......@@ -3781,8 +3787,11 @@ def feed_entry(name):
or get_first(microformats, 'lengthSeconds')
or parse_duration(search_meta('duration'))) or None
post_live = get_first(video_details, 'isPostLiveDvr')
plml = post_live and (duration or 0) > 4 * 3600
live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
self._list_formats(video_id, microformats, video_details, player_responses, player_url)
self._list_formats(video_id, microformats, video_details, player_responses, player_url, plml=plml)
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
......@@ -3868,8 +3877,8 @@ def feed_entry(name):
if not duration and live_end_time and live_start_time:
duration = live_end_time - live_start_time
if is_live and self.get_param('live_from_start'):
self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
if is_live and self.get_param('live_from_start') or plml:
self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data, plml)
# formats.extend(self._extract_storyboard(player_responses, duration))
......@@ -3918,13 +3927,9 @@ def feed_entry(name):
'unrecoverable_http_error': (401, 503),
}
if get_first(video_details, 'isPostLiveDvr'):
if post_live:
self.write_debug('Video is in Post-Live Manifestless mode')
info['live_status'] = 'post_live'
if (duration or 0) > 4 * 3600:
self.report_warning(
'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
'This is a known issue and patches are welcome')
subtitles = {}
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment