From 85df2c37e0f81262367b2a911ac43b924b93fe8b Mon Sep 17 00:00:00 2001 From: Nicolas Cisco Date: Fri, 24 Nov 2023 11:18:55 -0300 Subject: [PATCH 1/6] Fixed mediastream when hls uses extra arguments --- yt_dlp/extractor/mediastream.py | 37 +++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index b8cb5a691cc..cf351dbfa75 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -8,6 +8,8 @@ urljoin, ) +from urllib.parse import urlparse, urlencode, parse_qs + class MediaStreamBaseIE(InfoExtractor): _EMBED_BASE_URL = 'https://mdstrm.com/embed' @@ -108,7 +110,9 @@ def _real_extract(self, url): for message in [ 'Debido a tu ubicación no puedes ver el contenido', - 'You are not allowed to watch this video: Geo Fencing Restriction' + 'You are not allowed to watch this video: Geo Fencing Restriction', + 'Este contenido no está disponible en tu zona geográfica.', + 'El contenido sólo está disponible dentro de', ]: if message in webpage: self.raise_geo_restricted() @@ -118,7 +122,36 @@ def _real_extract(self, url): formats, subtitles = [], {} for video_format in player_config['src']: if video_format == 'hls': - fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id) + src = player_config['src'][video_format] + params = {} + + uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', fatal = False, default = None) + if uid: + params['uid'] = uid + + sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', fatal = False, default = None) + if sid: + params['sid'] = sid + + pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', fatal = False, default = None) + if pid: + params['pid'] = pid + + version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', fatal = False, default = None) + if version: + params['at'] = 'web-app' + params['av'] = version + + parsed = urlparse(url) + if len(parsed.query) > 0: + qs = parse_qs(parsed.query) + if 'access_token' in qs and len(qs['access_token']) > 0: + params['access_token'] = qs['access_token'][0] + + if len(params): + src = f"{src}?{urlencode(params)}" + + fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif video_format == 'mpd': From d570ceef6fa35b3d66462844f0d598a1a0538edc Mon Sep 17 00:00:00 2001 From: Nicolas Cisco Date: Fri, 24 Nov 2023 11:36:37 -0300 Subject: [PATCH 2/6] Fixed used of quotes --- yt_dlp/extractor/mediastream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index cf351dbfa75..e3cf1ac078a 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -149,7 +149,7 @@ def _real_extract(self, url): params['access_token'] = qs['access_token'][0] if len(params): - src = f"{src}?{urlencode(params)}" + src = f'{src}?{urlencode(params)}' fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id) formats.extend(fmts) From 2fecc5c4f34cfa8b926746f2418c2939ad7ad9ee Mon Sep 17 00:00:00 2001 From: Nicolas Cisco Date: Fri, 24 Nov 2023 11:43:55 -0300 Subject: [PATCH 3/6] Fixed flake8 --- yt_dlp/extractor/mediastream.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index e3cf1ac078a..09c437829d2 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -125,19 +125,19 @@ def _real_extract(self, url): src = player_config['src'][video_format] params = {} - uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', fatal = False, default = None) + uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', fatal=False, default=None) if uid: params['uid'] = uid - sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', fatal = False, default = None) + sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', fatal=False, default=None) if sid: params['sid'] = sid - pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', fatal = False, default = None) + pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', fatal=False, default=None) if pid: params['pid'] = pid - version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', fatal = False, default = None) + version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', fatal=False, default=None) if version: params['at'] = 'web-app' params['av'] = version From 93a59a7ac22255d9ca45e7d8372fa7eeb372000d Mon Sep 17 00:00:00 2001 From: Nicolas Cisco Date: Tue, 28 Nov 2023 13:01:01 -0300 Subject: [PATCH 4/6] Applied suggestions --- yt_dlp/extractor/mediastream.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index 09c437829d2..358c586b584 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -6,10 +6,10 @@ remove_end, traverse_obj, urljoin, + parse_qs, + update_url_query ) -from urllib.parse import urlparse, urlencode, parse_qs - class MediaStreamBaseIE(InfoExtractor): _EMBED_BASE_URL = 'https://mdstrm.com/embed' @@ -122,36 +122,29 @@ def _real_extract(self, url): formats, subtitles = [], {} for video_format in player_config['src']: if video_format == 'hls': - src = player_config['src'][video_format] params = {} - uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', fatal=False, default=None) + uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', default=None) if uid: params['uid'] = uid - sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', fatal=False, default=None) + sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', default=None) if sid: params['sid'] = sid - pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', fatal=False, default=None) + pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', default=None) if pid: params['pid'] = pid - version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', fatal=False, default=None) + version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', default=None) if version: params['at'] = 'web-app' params['av'] = version - parsed = urlparse(url) - if len(parsed.query) > 0: - qs = parse_qs(parsed.query) - if 'access_token' in qs and len(qs['access_token']) > 0: - params['access_token'] = qs['access_token'][0] - - if len(params): - src = f'{src}?{urlencode(params)}' + if access_token := parse_qs(url).get('access_token', [None])[0]: + params['access_token'] = access_token - fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id) + fmts, subs = self._extract_m3u8_formats_and_subtitles(update_url_query(player_config['src'][video_format], params), video_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif video_format == 'mpd': From afb1b5afa2d1a9e4236109fbdc6b35325184ef85 Mon Sep 17 00:00:00 2001 From: Nicolas Cisco Date: Tue, 28 Nov 2023 13:48:27 -0300 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- yt_dlp/extractor/mediastream.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index 358c586b584..75f14a818ba 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -122,29 +122,18 @@ def _real_extract(self, url): formats, subtitles = [], {} for video_format in player_config['src']: if video_format == 'hls': - params = {} + params = {'at': 'web-app'} - uid = self._search_regex(r'window\.MDSTRMUID\s*=\s*["\']([^"\']+)["\'];', webpage, 'uid', default=None) - if uid: - params['uid'] = uid - - sid = self._search_regex(r'window\.MDSTRMSID\s*=\s*["\']([^"\']+)["\'];', webpage, 'sid', default=None) - if sid: - params['sid'] = sid - - pid = self._search_regex(r'window\.MDSTRMPID\s*=\s*["\']([^"\']+)["\'];', webpage, 'pid', default=None) - if pid: - params['pid'] = pid - - version = self._search_regex(r'window\.VERSION\s*=\s*["\']([^"\']+)["\'];', webpage, 'version', default=None) - if version: - params['at'] = 'web-app' - params['av'] = version + for name, key in [('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')]: + if val := self._search_regex(rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', + webpage, key, default=None): + params[key] = val if access_token := parse_qs(url).get('access_token', [None])[0]: params['access_token'] = access_token - fmts, subs = self._extract_m3u8_formats_and_subtitles(update_url_query(player_config['src'][video_format], params), video_id) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + update_url_query(player_config['src'][video_format], params), video_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif video_format == 'mpd': From f0208dc9661223f8299b0fe0032084ba95038ba3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 29 Nov 2023 02:16:34 +0530 Subject: [PATCH 6/6] Apply suggestions from code review --- yt_dlp/extractor/mediastream.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index 75f14a818ba..ae0fb2aed27 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -3,11 +3,12 @@ from .common import InfoExtractor from ..utils import ( clean_html, + filter_dict, + parse_qs, remove_end, traverse_obj, + update_url_query, urljoin, - parse_qs, - update_url_query ) @@ -122,18 +123,16 @@ def _real_extract(self, url): formats, subtitles = [], {} for video_format in player_config['src']: if video_format == 'hls': - params = {'at': 'web-app'} - - for name, key in [('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')]: - if val := self._search_regex(rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', - webpage, key, default=None): - params[key] = val - - if access_token := parse_qs(url).get('access_token', [None])[0]: - params['access_token'] = access_token + params = { + 'at': 'web-app', + 'access_token': traverse_obj(parse_qs(url), ('access_token', 0)), + } + for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')): + params[key] = self._search_regex( + rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None) fmts, subs = self._extract_m3u8_formats_and_subtitles( - update_url_query(player_config['src'][video_format], params), video_id) + update_url_query(player_config['src'][video_format], filter_dict(params)), video_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif video_format == 'mpd':