Skip to content

Commit

Permalink
[ie/twitter:broadcast] Improve metadata extraction (#8383)
Browse files Browse the repository at this point in the history
Authored by: HitomaruKonpaku
  • Loading branch information
HitomaruKonpaku committed Nov 11, 2023
1 parent 10025b7 commit 7d337ca
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 5 deletions.
5 changes: 3 additions & 2 deletions yt_dlp/extractor/periscope.py
Expand Up @@ -24,12 +24,13 @@ def _parse_broadcast_data(self, broadcast, video_id):

thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
} for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]

return {
'id': broadcast.get('id') or video_id,
'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
broadcast.get('created_at_ms'), scale=1000),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,
Expand Down
40 changes: 37 additions & 3 deletions yt_dlp/extractor/twitter.py
Expand Up @@ -1563,19 +1563,50 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'

_TEST = {
_TESTS = [{
# untitled Periscope video
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
'info_dict': {
'id': '1yNGaQLWpejGj',
'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast',
'uploader': 'Andrea May Sahouri',
'uploader_id': '1PXEdBZWpGwKe',
'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri',
'timestamp': 1590973638,
'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}
}, {
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
'info_dict': {
'id': '1ZkKzeyrPbaxv',
'ext': 'mp4',
'title': 'Starship | SN10 | High-Altitude Flight Test',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'timestamp': 1614812942,
'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}, {
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
'info_dict': {
'id': '1OyKAVQrgzwGb',
'ext': 'mp4',
'title': 'Starship Flight Test',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'timestamp': 1681993964,
'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}]

def _real_extract(self, url):
broadcast_id = self._match_id(url)
Expand All @@ -1585,6 +1616,9 @@ def _real_extract(self, url):
if not broadcast:
raise ExtractorError('Broadcast no longer exists', expected=True)
info = self._parse_broadcast_data(broadcast, broadcast_id)
info['title'] = broadcast.get('status') or info.get('title')
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
media_key = broadcast['media_key']
source = self._call_api(
f'live_video_stream/status/{media_key}', media_key)['source']
Expand Down

0 comments on commit 7d337ca

Please sign in to comment.