Skip to content

Commit

Permalink
[extractor/generic] Add extractor-args hls_key, variant_query (#6567
Browse files Browse the repository at this point in the history
)

Authored by: bashonly
  • Loading branch information
bashonly committed Mar 21, 2023
1 parent 06966cb commit c2e0fc4
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -1798,6 +1798,8 @@ The following extractors use this feature:

#### generic
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist

#### funimation
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
Expand Down
32 changes: 21 additions & 11 deletions yt_dlp/extractor/generic.py
Expand Up @@ -24,6 +24,7 @@
mimetype2ext,
orderedSet,
parse_duration,
parse_qs,
parse_resolution,
smuggle_url,
str_or_none,
Expand All @@ -32,6 +33,7 @@
unescapeHTML,
unified_timestamp,
unsmuggle_url,
update_url_query,
url_or_none,
urljoin,
variadic,
Expand Down Expand Up @@ -2184,12 +2186,21 @@ def report_detected(self, name, num=1, note=None):

self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')

def _fragment_query(self, url):
def _extra_manifest_info(self, info, manifest_url):
if self._configuration_arg('fragment_query'):
query_string = urllib.parse.urlparse(url).query
query_string = urllib.parse.urlparse(manifest_url).query
if query_string:
return {'extra_param_to_segment_url': query_string}
return {}
info['extra_param_to_segment_url'] = query_string

hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
}) or None

if self._configuration_arg('variant_query'):
query = parse_qs(manifest_url)
for fmt in self._downloader._get_formats(info):
fmt['url'] = update_url_query(fmt['url'], query)

def _extract_rss(self, url, video_id, doc):
NS_MAP = {
Expand Down Expand Up @@ -2397,10 +2408,8 @@ def _real_extract(self, url):
subtitles = {}
if format_id.endswith('mpegurl') or ext == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
info_dict.update(self._fragment_query(url))
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
info_dict.update(self._fragment_query(url))
elif format_id == 'f4m' or ext == 'f4m':
formats = self._extract_f4m_formats(url, video_id, headers=headers)
else:
Expand All @@ -2415,6 +2424,7 @@ def _real_extract(self, url):
'subtitles': subtitles,
'http_headers': headers or None,
})
self._extra_manifest_info(info_dict, url)
return info_dict

if not self.get_param('test', False) and not is_intentional:
Expand All @@ -2427,7 +2437,7 @@ def _real_extract(self, url):
if first_bytes.startswith(b'#EXTM3U'):
self.report_detected('M3U playlist')
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
info_dict.update(self._fragment_query(url))
self._extra_manifest_info(info_dict, url)
return info_dict

# Maybe it's a direct link to a video?
Expand Down Expand Up @@ -2478,7 +2488,7 @@ def _real_extract(self, url):
doc,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
info_dict.update(self._fragment_query(url))
self._extra_manifest_info(info_dict, url)
self.report_detected('DASH manifest')
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
Expand Down Expand Up @@ -2592,7 +2602,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for fmt in formats:
fmt.update(self._fragment_query(src))
self._extra_manifest_info(fmt, src)

if not formats:
formats.append({
Expand Down Expand Up @@ -2795,10 +2805,10 @@ def filter_video(urls):
return [self._extract_xspf_playlist(video_url, video_id)]
elif ext == 'm3u8':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
entry_info_dict.update(self._fragment_query(video_url))
self._extra_manifest_info(entry_info_dict, video_url)
elif ext == 'mpd':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
entry_info_dict.update(self._fragment_query(video_url))
self._extra_manifest_info(entry_info_dict, video_url)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
Expand Down

2 comments on commit c2e0fc4

@magneto261290
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why it can't be made to download fragments with any external downloader and then fetch key with custom uri and decrypt video with native?

@pukkandan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Fragment downloading" will need to be implemented for each downloader and it is currently done for only aria2c (which will work with these options).

Please sign in to comment.