From 2e0d6867a10de726c656671816108ea1c9be900a Mon Sep 17 00:00:00 2001 From: garret Date: Sun, 23 Apr 2023 22:21:05 +0100 Subject: [PATCH 1/7] [GlobalPlayer] add extractors --- yt_dlp/extractor/_extractors.py | 7 + yt_dlp/extractor/globalplayer.py | 276 +++++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100755 yt_dlp/extractor/globalplayer.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 58137d7f6e4..fe936cf008e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -685,6 +685,13 @@ from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE +from .globalplayer import ( + GlobalPlayerLiveIE, + GlobalPlayerLivePlaylistIE, + GlobalPlayerAudioIE, + GlobalPlayerAudioEpisodeIE, + GlobalPlayerVideoIE +) from .globo import ( GloboIE, GloboArticleIE, diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py new file mode 100755 index 00000000000..89788c99593 --- /dev/null +++ b/yt_dlp/extractor/globalplayer.py @@ -0,0 +1,276 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + parse_duration, + str_or_none, + traverse_obj, + unified_strdate, + unified_timestamp, + urlhandle_detect_ext, +) + + +class GlobalPlayerBaseIE(InfoExtractor): + + def _get_pageProps(self, url, video_id): + webpage = self._download_webpage(url, video_id) + data = self._search_nextjs_data(webpage, video_id) + return traverse_obj(data, ('props', 'pageProps')) + + def _extract_audio(self, episode, series): + return { + 'vcodec': 'none', + **traverse_obj(series, { + 'series': 'title', + 'series_id': 'id', + 'thumbnail': 'imageUrl', + }), + **traverse_obj(episode, { + 'id': 'id', + 'description': ('description', {clean_html}), + 'duration': ('duration', {parse_duration}), + 'thumbnail': 'imageUrl', + 'url': 'streamUrl', + 'timestamp': (['pubDate', 'startDate'], {unified_timestamp}), + # pubDate for podcasts, startDate for radio catchup - that's all we need to have both in one + 'title': 'title', + }, get_all=False) + } + + +class GlobalPlayerLiveIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P\w+)/\w+/$' + _TESTS = [{ + 'url': 'https://www.globalplayer.com/live/smoothchill/uk/', + 'info_dict': { + 'id': '2mx1E', + 'ext': 'aac', + 'title': str, + 'thumbnail': 'md5:407a54f3a18e54aa0326a399e68a7d50', + 'description': 'Music To Chill To', + 'live_status': 'is_live', + 'display_id': 'smoothchill-uk', + }}, { + # national station + 'url': 'https://www.globalplayer.com/live/heart/uk/', + 'info_dict': { + 'id': '2mwx4', + 'ext': 'aac', + 'title': str, + 'thumbnail': 'md5:6f13378a53ce55bcf57365a654e1b490', + 'live_status': 'is_live', + 'description': 'turn up the feel good!', + 'display_id': 'heart-uk', + + }}, { + # regional variation + 'url': 'https://www.globalplayer.com/live/heart/london/', + 'info_dict': { + 'id': 'AMqg', + 'ext': 'aac', + 'title': str, + 'thumbnail': 'md5:6f13378a53ce55bcf57365a654e1b490', + 'description': 'turn up the feel good!', + 'live_status': 'is_live', + 'display_id': 'heart-london', + + }}, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + props = self._get_pageProps(url, video_id) + station = props['station'] + + url = station['streamUrl'] + urlh = self._request_webpage(url, video_id, note='Determining source extension') + ext = urlhandle_detect_ext(urlh) + + display_id = [station.get('brandSlug'), station.get('slug')] + if None not in display_id: + display_id = '-'.join(display_id) + else: + display_id = station.get('brandSlug') or station.get('legacyStationPrefix') + + return { + 'id': station['id'], + 'display_id': display_id, + 'url': url, + 'ext': ext, + 'vcodec': 'none', + 'is_live': True, + **traverse_obj(station, { + 'title': (['name', 'brandName'], {str_or_none}), + 'description': 'tagline', + 'thumbnail': 'brandLogo', + }, get_all=False), + } + + +class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P\w+)/$' + _TESTS = [{ + # "live playlist" + 'url': 'https://www.globalplayer.com/playlists/8bLk/', + 'info_dict': { + 'id': '8bLk', + 'ext': 'aac', + 'title': str, + 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', + 'live_status': 'is_live', + 'thumbnail': 'md5:0e0d47914a380577afdb4482a9561210', + } + } + ] + + # very similar to live radio, but different enough that it's easier to separate them + + def _real_extract(self, url): + video_id = self._match_id(url) + props = self._get_pageProps(url, video_id) + station = props['playlistData'] + + url = station['streamUrl'] + urlh = self._request_webpage(url, video_id, note='Determining source extension') + ext = urlhandle_detect_ext(urlh) + + return { + 'url': url, + 'ext': ext, + 'vcodec': 'none', + 'is_live': True, + **traverse_obj(station, { + 'id': 'id', + 'title': 'title', + 'description': 'description', + 'thumbnail': 'image', + }), + } + + +class GlobalPlayerAudioIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)/|catchup/\w+/\w+/)(?P\w+)/?$' + _TESTS = [{ + # podcast + 'url': 'https://www.globalplayer.com/podcasts/42KuaM/', + 'info_dict': { + 'id': '42KuaM', + 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', + 'description': 'md5:da5b918eac9ae319454a10a563afacf9', + 'uploader': 'Global', + 'title': 'Filthy Ritual', + 'categories': ['Society & Culture', 'True Crime'], + }, + 'playlist_mincount': 5, + }, { + # radio catchup + 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/', + 'info_dict': { + 'id': '46vyD7z', + 'title': 'Nick Ferrari', + 'description': 'md5:53b6fa5ef71a3cff6628551bcc416384', + 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', + }, + 'playlist_mincount': 3, + }] + + def _real_extract(self, url): + video_id, podcast = self._match_valid_url(url).group('id', 'podcast') + props = self._get_pageProps(url, video_id) + + if podcast: + series = props['podcastInfo'] + categories = [i.get('name') for i in series.get('categories')] + else: + series = props['catchupInfo'] + categories = None + + return { + '_type': 'playlist', + 'categories': categories, # podcasts only + 'entries': [self._extract_audio(ep, series) for ep in series['episodes']], + **traverse_obj(series, { + 'description': 'description', + 'id': 'id', + 'thumbnail': 'imageUrl', + 'title': 'title', + 'uploader': 'itunesAuthor', # podcasts only + }), + } + + +class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)|catchup/\w+/\w+)/episodes/(?P\w+)/$' + _TESTS = [{ + # podcast + 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/', + 'info_dict': { + 'id': '7DrfNnE', + 'ext': 'mp3', + 'title': 'Filthy Ritual - Trailer', + 'duration': 225, + 'description': 'md5:1f1562fd0f01b4773b590984f94223e0', + 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', + 'upload_date': '20230411', + 'timestamp': 1681254900, + 'series': 'Filthy Ritual', + 'series_id': '42KuaM', + + } + }, { + # radio catchup + 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/', + 'info_dict': { + 'id': '2zGq26Vcv1fCWhddC4JAwETXWe', + 'ext': 'm4a', + 'title': 'Nick Ferrari', + 'duration': 10800, + 'description': 'md5:53b6fa5ef71a3cff6628551bcc416384', + 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', + 'series_id': '46vyD7z', + 'upload_date': '20230421', + 'timestamp': 1682056800, + 'series': 'Nick Ferrari', + }, + }] + + def _real_extract(self, url): + video_id, podcast = self._match_valid_url(url).group('id', 'podcast') + props = self._get_pageProps(url, video_id) + if podcast: + episode = props['podcastEpisode'] + series = episode['podcast'] + else: + episode = props['catchupEpisode'] + series = episode['show'] + + return self._extract_audio(episode, series) + + +class GlobalPlayerVideoIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P\w+)/$' + _TESTS = [{ + 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/', + 'info_dict': { + 'id': '2JsSZ7Gm2uP', + 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550', + 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live', + 'upload_date': '20230420', + 'ext': 'mp4', + 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._get_pageProps(url, video_id)['videoData'] + + return traverse_obj(meta, { + 'id': 'id', + 'thumbnail': ('image', 'url'), + 'title': 'title', + 'upload_date': ('publish_date', {unified_strdate}), + 'url': 'url', + 'description': 'description', + }) + # there's more metadata available but i can't be bothered to match up which is which From 9701e7b17ab8d3396ea8d52e87820e2285188d22 Mon Sep 17 00:00:00 2001 From: garret Date: Mon, 24 Apr 2023 13:25:55 +0100 Subject: [PATCH 2/7] remove single-use props variables --- yt_dlp/extractor/globalplayer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index 89788c99593..eccf7a396c8 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -79,8 +79,7 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - props = self._get_pageProps(url, video_id) - station = props['station'] + station = self._get_pageProps(url, video_id)['station'] url = station['streamUrl'] urlh = self._request_webpage(url, video_id, note='Determining source extension') @@ -127,8 +126,7 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - props = self._get_pageProps(url, video_id) - station = props['playlistData'] + station = self._get_pageProps(url, video_id)['playlistData'] url = station['streamUrl'] urlh = self._request_webpage(url, video_id, note='Determining source extension') From db5881a4336ad6d868e7f9428d34010d90c18e5c Mon Sep 17 00:00:00 2001 From: garret Date: Mon, 24 Apr 2023 14:40:31 +0100 Subject: [PATCH 3/7] add detect_ext convenience func to BaseIE --- yt_dlp/extractor/globalplayer.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index eccf7a396c8..5fad0269e11 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -17,6 +17,10 @@ def _get_pageProps(self, url, video_id): data = self._search_nextjs_data(webpage, video_id) return traverse_obj(data, ('props', 'pageProps')) + def _request_ext(self, url, video_id): + urlh = self._request_webpage(url, video_id, note='Determining source extension') + return urlhandle_detect_ext(urlh) + def _extract_audio(self, episode, series): return { 'vcodec': 'none', @@ -80,10 +84,7 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id = self._match_id(url) station = self._get_pageProps(url, video_id)['station'] - url = station['streamUrl'] - urlh = self._request_webpage(url, video_id, note='Determining source extension') - ext = urlhandle_detect_ext(urlh) display_id = [station.get('brandSlug'), station.get('slug')] if None not in display_id: @@ -95,7 +96,7 @@ def _real_extract(self, url): 'id': station['id'], 'display_id': display_id, 'url': url, - 'ext': ext, + 'ext': self._request_ext(url, video_id), 'vcodec': 'none', 'is_live': True, **traverse_obj(station, { @@ -127,14 +128,11 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id = self._match_id(url) station = self._get_pageProps(url, video_id)['playlistData'] - url = station['streamUrl'] - urlh = self._request_webpage(url, video_id, note='Determining source extension') - ext = urlhandle_detect_ext(urlh) return { 'url': url, - 'ext': ext, + 'ext': self._request_ext(url, video_id), 'vcodec': 'none', 'is_live': True, **traverse_obj(station, { From 4ba420d32d47d97db4635c1e1996b38d9038c3ef Mon Sep 17 00:00:00 2001 From: garret Date: Wed, 26 Apr 2023 15:45:45 +0100 Subject: [PATCH 4/7] apply code review suggestions Co-Authored-By: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/globalplayer.py | 115 +++++++++++++------------------ 1 file changed, 48 insertions(+), 67 deletions(-) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index 5fad0269e11..1c7114c4d19 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -1,6 +1,7 @@ from .common import InfoExtractor from ..utils import ( clean_html, + join_nonempty, parse_duration, str_or_none, traverse_obj, @@ -12,14 +13,13 @@ class GlobalPlayerBaseIE(InfoExtractor): - def _get_pageProps(self, url, video_id): + def _get_page_props(self, url, video_id): webpage = self._download_webpage(url, video_id) - data = self._search_nextjs_data(webpage, video_id) - return traverse_obj(data, ('props', 'pageProps')) + return self._search_nextjs_data(webpage, video_id)['props']['pageProps'] def _request_ext(self, url, video_id): - urlh = self._request_webpage(url, video_id, note='Determining source extension') - return urlhandle_detect_ext(urlh) + return urlhandle_detect_ext(self._request_webpage( + url, video_id, note='Determining source extension')) def _extract_audio(self, episode, series): return { @@ -43,7 +43,7 @@ def _extract_audio(self, episode, series): class GlobalPlayerLiveIE(GlobalPlayerBaseIE): - _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P\w+)/\w+/$' + _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P\w+)/\w+' _TESTS = [{ 'url': 'https://www.globalplayer.com/live/smoothchill/uk/', 'info_dict': { @@ -54,7 +54,8 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'description': 'Music To Chill To', 'live_status': 'is_live', 'display_id': 'smoothchill-uk', - }}, { + }, + }, { # national station 'url': 'https://www.globalplayer.com/live/heart/uk/', 'info_dict': { @@ -65,8 +66,8 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'live_status': 'is_live', 'description': 'turn up the feel good!', 'display_id': 'heart-uk', - - }}, { + }, + }, { # regional variation 'url': 'https://www.globalplayer.com/live/heart/london/', 'info_dict': { @@ -77,26 +78,19 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'description': 'turn up the feel good!', 'live_status': 'is_live', 'display_id': 'heart-london', - - }}, - ] + }, + }] def _real_extract(self, url): video_id = self._match_id(url) - station = self._get_pageProps(url, video_id)['station'] - url = station['streamUrl'] - - display_id = [station.get('brandSlug'), station.get('slug')] - if None not in display_id: - display_id = '-'.join(display_id) - else: - display_id = station.get('brandSlug') or station.get('legacyStationPrefix') + station = self._get_page_props(url, video_id)['station'] + stream_url = station['streamUrl'] return { 'id': station['id'], - 'display_id': display_id, - 'url': url, - 'ext': self._request_ext(url, video_id), + 'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'), + 'url': stream_url, + 'ext': self._request_ext(stream_url, video_id), 'vcodec': 'none', 'is_live': True, **traverse_obj(station, { @@ -108,7 +102,7 @@ def _real_extract(self, url): class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): - _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P\w+)/$' + _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P\w+)' _TESTS = [{ # "live playlist" 'url': 'https://www.globalplayer.com/playlists/8bLk/', @@ -120,23 +114,20 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): 'live_status': 'is_live', 'thumbnail': 'md5:0e0d47914a380577afdb4482a9561210', } - } - ] - - # very similar to live radio, but different enough that it's easier to separate them + }] def _real_extract(self, url): video_id = self._match_id(url) - station = self._get_pageProps(url, video_id)['playlistData'] - url = station['streamUrl'] + station = self._get_page_props(url, video_id)['playlistData'] + stream_url = station['streamUrl'] return { - 'url': url, - 'ext': self._request_ext(url, video_id), + 'id': video_id, + 'url': stream_url, + 'ext': self._request_ext(stream_url, video_id), 'vcodec': 'none', 'is_live': True, **traverse_obj(station, { - 'id': 'id', 'title': 'title', 'description': 'description', 'thumbnail': 'image', @@ -145,7 +136,7 @@ def _real_extract(self, url): class GlobalPlayerAudioIE(GlobalPlayerBaseIE): - _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)/|catchup/\w+/\w+/)(?P\w+)/?$' + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)/|catchup/\w+/\w+/)(?P\w+)/?(?:$|[?#])' _TESTS = [{ # podcast 'url': 'https://www.globalplayer.com/podcasts/42KuaM/', @@ -172,22 +163,17 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id, podcast = self._match_valid_url(url).group('id', 'podcast') - props = self._get_pageProps(url, video_id) - - if podcast: - series = props['podcastInfo'] - categories = [i.get('name') for i in series.get('categories')] - else: - series = props['catchupInfo'] - categories = None + props = self._get_page_props(url, video_id) + series = props['podcastInfo'] if podcast else props['catchupInfo'] return { '_type': 'playlist', - 'categories': categories, # podcasts only - 'entries': [self._extract_audio(ep, series) for ep in series['episodes']], + 'id': video_id, + 'entries': [self._extract_audio(ep, series) for ep in traverse_obj( + series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))], + 'categories': traverse_obj(series, ('categories', ..., 'name')) or None, **traverse_obj(series, { 'description': 'description', - 'id': 'id', 'thumbnail': 'imageUrl', 'title': 'title', 'uploader': 'itunesAuthor', # podcasts only @@ -196,7 +182,7 @@ def _real_extract(self, url): class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): - _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)|catchup/\w+/\w+)/episodes/(?P\w+)/$' + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)|catchup/\w+/\w+)/episodes/(?P\w+)/?(?:$|[?#])' _TESTS = [{ # podcast 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/', @@ -211,7 +197,6 @@ class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): 'timestamp': 1681254900, 'series': 'Filthy Ritual', 'series_id': '42KuaM', - } }, { # radio catchup @@ -232,19 +217,14 @@ class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id, podcast = self._match_valid_url(url).group('id', 'podcast') - props = self._get_pageProps(url, video_id) - if podcast: - episode = props['podcastEpisode'] - series = episode['podcast'] - else: - episode = props['catchupEpisode'] - series = episode['show'] - - return self._extract_audio(episode, series) + props = self._get_page_props(url, video_id) + episode = props['podcastEpisode'] if podcast else props['catchupEpisode'] + return self._extract_audio( + episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {}) class GlobalPlayerVideoIE(GlobalPlayerBaseIE): - _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P\w+)/$' + _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P\w+)' _TESTS = [{ 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/', 'info_dict': { @@ -259,14 +239,15 @@ class GlobalPlayerVideoIE(GlobalPlayerBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - meta = self._get_pageProps(url, video_id)['videoData'] + meta = self._get_page_props(url, video_id)['videoData'] - return traverse_obj(meta, { - 'id': 'id', - 'thumbnail': ('image', 'url'), - 'title': 'title', - 'upload_date': ('publish_date', {unified_strdate}), - 'url': 'url', - 'description': 'description', - }) - # there's more metadata available but i can't be bothered to match up which is which + return { + 'id': video_id, + **traverse_obj(meta, { + 'url': 'url', + 'thumbnail': ('image', 'url'), + 'title': 'title', + 'upload_date': ('publish_date', {unified_strdate}), + 'description': 'description', + }), + } From b17aeb69d45c14a96e49c97b6ec92ab05f216a79 Mon Sep 17 00:00:00 2001 From: garret Date: Wed, 26 Apr 2023 15:53:50 +0100 Subject: [PATCH 5/7] apply test style suggestions also one flake8 Co-Authored-By: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/globalplayer.py | 65 ++++++++++++++++---------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index 1c7114c4d19..d8db59d12d9 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -49,11 +49,11 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'info_dict': { 'id': '2mx1E', 'ext': 'aac', - 'title': str, - 'thumbnail': 'md5:407a54f3a18e54aa0326a399e68a7d50', + 'display_id': 'smoothchill-uk', + 'title': 're:^Smooth Chill.+$', + 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png', 'description': 'Music To Chill To', 'live_status': 'is_live', - 'display_id': 'smoothchill-uk', }, }, { # national station @@ -61,10 +61,10 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'info_dict': { 'id': '2mwx4', 'ext': 'aac', - 'title': str, - 'thumbnail': 'md5:6f13378a53ce55bcf57365a654e1b490', - 'live_status': 'is_live', 'description': 'turn up the feel good!', + 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', + 'live_status': 'is_live', + 'title': 're:^Heart UK.+$', 'display_id': 'heart-uk', }, }, { @@ -73,11 +73,11 @@ class GlobalPlayerLiveIE(GlobalPlayerBaseIE): 'info_dict': { 'id': 'AMqg', 'ext': 'aac', - 'title': str, - 'thumbnail': 'md5:6f13378a53ce55bcf57365a654e1b490', - 'description': 'turn up the feel good!', + 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', + 'title': 're:^Heart London.+$', 'live_status': 'is_live', 'display_id': 'heart-london', + 'description': 'turn up the feel good!', }, }] @@ -109,11 +109,11 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): 'info_dict': { 'id': '8bLk', 'ext': 'aac', - 'title': str, - 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', 'live_status': 'is_live', - 'thumbnail': 'md5:0e0d47914a380577afdb4482a9561210', - } + 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', + 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=', + 'title': 're:^Classic FM Hall of Fame.+$' + }, }] def _real_extract(self, url): @@ -140,25 +140,25 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE): _TESTS = [{ # podcast 'url': 'https://www.globalplayer.com/podcasts/42KuaM/', + 'playlist_mincount': 5, 'info_dict': { 'id': '42KuaM', - 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', - 'description': 'md5:da5b918eac9ae319454a10a563afacf9', - 'uploader': 'Global', 'title': 'Filthy Ritual', + 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', 'categories': ['Society & Culture', 'True Crime'], + 'uploader': 'Global', + 'description': 'md5:da5b918eac9ae319454a10a563afacf9', }, - 'playlist_mincount': 5, }, { # radio catchup 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/', + 'playlist_mincount': 3, 'info_dict': { 'id': '46vyD7z', + 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', 'title': 'Nick Ferrari', - 'description': 'md5:53b6fa5ef71a3cff6628551bcc416384', 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', }, - 'playlist_mincount': 3, }] def _real_extract(self, url): @@ -190,28 +190,28 @@ class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): 'id': '7DrfNnE', 'ext': 'mp3', 'title': 'Filthy Ritual - Trailer', - 'duration': 225, 'description': 'md5:1f1562fd0f01b4773b590984f94223e0', 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', - 'upload_date': '20230411', + 'duration': 225.0, 'timestamp': 1681254900, 'series': 'Filthy Ritual', 'series_id': '42KuaM', - } + 'upload_date': '20230411', + }, }, { # radio catchup 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/', 'info_dict': { 'id': '2zGq26Vcv1fCWhddC4JAwETXWe', 'ext': 'm4a', - 'title': 'Nick Ferrari', - 'duration': 10800, - 'description': 'md5:53b6fa5ef71a3cff6628551bcc416384', - 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', - 'series_id': '46vyD7z', - 'upload_date': '20230421', 'timestamp': 1682056800, 'series': 'Nick Ferrari', + 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', + 'upload_date': '20230421', + 'series_id': '46vyD7z', + 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', + 'title': 'Nick Ferrari', + 'duration': 10800.0, }, }] @@ -223,18 +223,19 @@ def _real_extract(self, url): return self._extract_audio( episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {}) + class GlobalPlayerVideoIE(GlobalPlayerBaseIE): _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P\w+)' _TESTS = [{ 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/', 'info_dict': { 'id': '2JsSZ7Gm2uP', - 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550', - 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live', - 'upload_date': '20230420', 'ext': 'mp4', 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd', - } + 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550', + 'upload_date': '20230420', + 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live', + }, }] def _real_extract(self, url): From 9627fc701b41168ca687fbb26b5f1dbbdfe134c2 Mon Sep 17 00:00:00 2001 From: garret Date: Wed, 26 Apr 2023 16:54:22 +0100 Subject: [PATCH 6/7] Extract uploader in _extract_audio as well Co-Authored-By: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/globalplayer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index d8db59d12d9..444b186ba28 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -28,6 +28,7 @@ def _extract_audio(self, episode, series): 'series': 'title', 'series_id': 'id', 'thumbnail': 'imageUrl', + 'uploader': 'itunesAuthor', # podcasts only }), **traverse_obj(episode, { 'id': 'id', @@ -197,6 +198,7 @@ class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): 'series': 'Filthy Ritual', 'series_id': '42KuaM', 'upload_date': '20230411', + 'uploader': 'Global', }, }, { # radio catchup From 1deab4b57dfbbfb592da4845a41d5ac37c1578d6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 26 Apr 2023 23:15:09 +0000 Subject: [PATCH 7/7] Cleanup Co-authored-by: pukkandan --- yt_dlp/extractor/globalplayer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index 444b186ba28..e0c0d58fd43 100755 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -12,13 +12,12 @@ class GlobalPlayerBaseIE(InfoExtractor): - def _get_page_props(self, url, video_id): webpage = self._download_webpage(url, video_id) return self._search_nextjs_data(webpage, video_id)['props']['pageProps'] def _request_ext(self, url, video_id): - return urlhandle_detect_ext(self._request_webpage( + return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests url, video_id, note='Determining source extension')) def _extract_audio(self, episode, series): @@ -36,8 +35,7 @@ def _extract_audio(self, episode, series): 'duration': ('duration', {parse_duration}), 'thumbnail': 'imageUrl', 'url': 'streamUrl', - 'timestamp': (['pubDate', 'startDate'], {unified_timestamp}), - # pubDate for podcasts, startDate for radio catchup - that's all we need to have both in one + 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}), 'title': 'title', }, get_all=False) } @@ -95,7 +93,7 @@ def _real_extract(self, url): 'vcodec': 'none', 'is_live': True, **traverse_obj(station, { - 'title': (['name', 'brandName'], {str_or_none}), + 'title': (('name', 'brandName'), {str_or_none}), 'description': 'tagline', 'thumbnail': 'brandLogo', }, get_all=False),