From 4627841e3d2d6be0f00ca1621a3da946e8f70166 Mon Sep 17 00:00:00 2001 From: garret Date: Mon, 7 Aug 2023 11:10:07 +0100 Subject: [PATCH 1/4] [BBC] extract tracklist as chapters --- yt_dlp/extractor/bbc.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index a55cdef2b83..2b79d9bd0bd 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -15,11 +15,13 @@ float_or_none, get_element_by_class, int_or_none, + join_nonempty, js_to_json, parse_duration, parse_iso8601, parse_qs, strip_or_none, + traverse_obj, try_get, unescapeHTML, unified_timestamp, @@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor): iplayer(?:/[^/]+)?/(?:episode/|playlist/)| music/(?:clips|audiovideo/popular)[/#]| radio/player/| - sounds/play/| events/[^/]+/play/[^/]+/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) @@ -1118,6 +1119,15 @@ def _real_extract(self, url): image_url = current_programme.get('image_url') if image_url: thumbnail = image_url.replace('{recipe}', 'raw') + tracklist = [] + for track in traverse_obj(preload_state, ("tracklist", "tracks")): + tracklist.append({ + "title": join_nonempty("primary", "secondary", "tertiary", delim=" - ", from_dict=track.get("titles")), + **traverse_obj(track, { + "start_time": ("offset", "start"), + "end_time": ("offset", "end"), + }), + }) return { 'id': programme_id, 'title': title, @@ -1128,6 +1138,7 @@ def _real_extract(self, url): 'uploader_id': network.get('id'), 'formats': formats, 'subtitles': subtitles, + 'chapters': tracklist, } bbc3_config = self._parse_json( From f0075983bd8cc20290ef427434b86bef0ce67307 Mon Sep 17 00:00:00 2001 From: garret Date: Mon, 7 Aug 2023 10:23:26 +0100 Subject: [PATCH 2/4] add BBC Sounds test to BBCIE, remove from BBCCoUKIE --- yt_dlp/extractor/bbc.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 2b79d9bd0bd..48518813130 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -219,20 +219,6 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - }, { - 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb', - 'note': 'Audio', - 'info_dict': { - 'id': 'm0007jz9', - 'ext': 'mp4', - 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra', - 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.", - 'duration': 9840, - }, - 'params': { - # rtmp download - 'skip_download': True, - } }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, @@ -845,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'upload_date': '20190604', 'categories': ['Psychology'], }, + }, { + # BBC Sounds + 'url': 'https://www.bbc.co.uk/sounds/play/m001p2jp', + 'info_dict': { + 'id': 'm001p2jn', + 'ext': 'mp4', + 'title': 'Late Junction - Bonjo Iyabinghi Noah and GAIKA in session', + 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0cgqwnb.jpg', + 'duration': 7200, + 'chapters': 'count:24', + 'description': 'md5:36f16179df6ee9992e80fea912d97ea8', + 'uploader': 'Radio 3', + 'uploader_id': 'bbc_radio_three', + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, From e7466286e3523bac8eba00ec79b5d1adaeba1af5 Mon Sep 17 00:00:00 2001 From: garret Date: Fri, 15 Sep 2023 01:42:19 +0100 Subject: [PATCH 3/4] bbc sounds tracklists: inline non-fatal version Co-Authored-By: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/bbc.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 48518813130..90b593ecc2f 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1119,15 +1119,6 @@ def _real_extract(self, url): image_url = current_programme.get('image_url') if image_url: thumbnail = image_url.replace('{recipe}', 'raw') - tracklist = [] - for track in traverse_obj(preload_state, ("tracklist", "tracks")): - tracklist.append({ - "title": join_nonempty("primary", "secondary", "tertiary", delim=" - ", from_dict=track.get("titles")), - **traverse_obj(track, { - "start_time": ("offset", "start"), - "end_time": ("offset", "end"), - }), - }) return { 'id': programme_id, 'title': title, @@ -1138,7 +1129,13 @@ def _real_extract(self, url): 'uploader_id': network.get('id'), 'formats': formats, 'subtitles': subtitles, - 'chapters': tracklist, + 'chapters': traverse_obj(preload_state, ( + 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), { + 'title': ('titles', {lambda x: join_nonempty( + 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), + 'start_time': ('offset', 'start', {float_or_none}), + 'end_time': ('offset', 'end', {float_or_none}), + })) or None, } bbc3_config = self._parse_json( From b648691d1db0c89893544a29a110b9f5819de16e Mon Sep 17 00:00:00 2001 From: garret Date: Fri, 15 Sep 2023 01:44:54 +0100 Subject: [PATCH 4/4] bbc sounds tracklists: update test old one expired its annoying because the ones that last the longest (world service mostly) dont have tracklists --- yt_dlp/extractor/bbc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 90b593ecc2f..d1d6e04faaf 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -833,16 +833,16 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, }, { # BBC Sounds - 'url': 'https://www.bbc.co.uk/sounds/play/m001p2jp', + 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b', 'info_dict': { - 'id': 'm001p2jn', + 'id': 'm001q789', 'ext': 'mp4', - 'title': 'Late Junction - Bonjo Iyabinghi Noah and GAIKA in session', - 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0cgqwnb.jpg', - 'duration': 7200, - 'chapters': 'count:24', - 'description': 'md5:36f16179df6ee9992e80fea912d97ea8', + 'title': 'The Night Tracks Mix - Music for the darkling hour', + 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg', + 'chapters': 'count:8', + 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67', 'uploader': 'Radio 3', + 'duration': 1800, 'uploader_id': 'bbc_radio_three', }, }, { # onion routes