Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added CBCPlayerPlaylistIE #7870

Merged
merged 9 commits into from
Aug 20, 2023
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@
from .cbc import (
CBCIE,
CBCPlayerIE,
CBCPlayerPlaylistIE,
CBCGemIE,
CBCGemPlaylistIE,
CBCGemLiveIE,
Expand Down
58 changes: 58 additions & 0 deletions yt_dlp/extractor/cbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
)
from ..utils import (
ExtractorError,
get_element_by_id,
int_or_none,
join_nonempty,
js_to_json,
Expand Down Expand Up @@ -228,6 +229,63 @@ def _real_extract(self, url):
}


class CBCPlayerPlaylistIE(InfoExtractor):
IE_NAME = 'cbc.ca:player:playlist'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/))(?P<id>.+)'
trainman261 marked this conversation as resolved.
Show resolved Hide resolved
_TESTS = [{
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/tv shows/the national/latest broadcast',
'ie_key': 'CBCPlayer',
trainman261 marked this conversation as resolved.
Show resolved Hide resolved
}
}, {
'url': 'https://www.cbc.ca/player/news/Canada/North',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/canada/north',
'ie_key': 'CBCPlayer',
}
}]

def _real_extract(self, url):
# We have no other playlist id other than the URL, so use that
playlist_id = self._match_id(url).replace('%20', ' ').lower()
trainman261 marked this conversation as resolved.
Show resolved Hide resolved
# the json info we're looking for isn't marked as such, therefore we need a bit of a workaround.
json_content = self._parse_json(
re.sub(
r'^.*?{', '{',
get_element_by_id('initialStateDom', self._download_webpage(url, playlist_id))
)[:-1],
playlist_id)
trainman261 marked this conversation as resolved.
Show resolved Hide resolved
playlist_items = []
for key, value in json_content.get('video').get('clipsByCategory').items():
# We need to do a case insensitive match. If anyone has a better way, feel free to improve.
if key.lower() == playlist_id:
for video in value.get('items'):
playlist_items.append({
'_type': 'url',
'url': 'https://www.cbc.ca/player/play/%s' % video['id'],
'ie_key': 'CBCPlayer',
'timestamp': video['airDate'],
'categories': [video['contentArea']],
'description': video['description'],
'duration': video['duration'],
'id': video['id'],
'is_live': video['isLive'],
'series': video['showName'],
'thumbnail': video['thumbnail'],
'title': video['title'],
})
break
return {
'_type': 'playlist',
'id': playlist_id,
'ie_key': 'CBCPlayer',
'entries': playlist_items,
}
trainman261 marked this conversation as resolved.
Show resolved Hide resolved


class CBCGemIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
Expand Down