Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NUUM] Add extractors #8868

Merged
merged 11 commits into from
Feb 5, 2024
10 changes: 5 additions & 5 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,11 @@
NYTimesArticleIE,
NYTimesCookingIE,
)
from .nuum import (
NuumLiveIE,
NuumTabIE,
NuumMediaIE,
)
from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
Expand Down Expand Up @@ -2271,11 +2276,6 @@
WashingtonPostIE,
WashingtonPostArticleIE,
)
from .wasdtv import (
WASDTVStreamIE,
WASDTVRecordIE,
WASDTVClipIE,
)
from .wat import WatIE
from .wdr import (
WDRIE,
Expand Down
200 changes: 200 additions & 0 deletions yt_dlp/extractor/nuum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import functools

from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
UserNotLive,
int_or_none,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj


class NuumBaseIE(InfoExtractor):

def _call_api(self, path, video_id, description, query={}):
seproDev marked this conversation as resolved.
Show resolved Hide resolved
response = self._download_json(
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata')
if error := response.get('error'):
raise ExtractorError(f'API returned error: {error!r}')
return response.get('result')
seproDev marked this conversation as resolved.
Show resolved Hide resolved

def _get_channel_info(self, channel_name):
return self._call_api(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
})

def _parse_video_data(self, container):
stream = traverse_obj(container, ('media_container_streams', 0))
media = traverse_obj(stream, ('stream_media', 0))
seproDev marked this conversation as resolved.
Show resolved Hide resolved
media_url = traverse_obj(media, (
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)

return media_url, {
'id': str(container['media_container_id']),
'is_live': media.get('media_status') == 'RUNNING',
**traverse_obj(container, {
'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'channel': ('media_container_channel', 'channel_name', {str}),
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
}),
**traverse_obj(stream, {
'view_count': ('stream_total_viewers', {int_or_none}),
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
}),
**traverse_obj(media, {
'duration': ('media_duration', {int_or_none}),
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
}, get_all=False),
}


class NuumMediaIE(NuumBaseIE):
IE_NAME = 'nuum:media'
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
_TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3',
'info_dict': {
'id': '1567547',
'ext': 'mp4',
'title': 'Toxi$ - Hurtz',
'description': '',
'timestamp': 1702631651,
'upload_date': '20231215',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '6911',
'channel': 'toxis',
'duration': 116,
},
}, {
'url': 'https://nuum.ru/clips/1552564-pro-misu',
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
'info_dict': {
'id': '1552564',
'ext': 'mp4',
'title': 'Про Мису 🙃',
'timestamp': 1701971828,
'upload_date': '20231207',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '3320',
'channel': 'Misalelik',
'duration': 41,
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')

m3u8_url, info = self._parse_video_data(video_data)

formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', live=info['is_live'])
info.update({
'formats': formats,
'subtitles': subtitles,
})
return info


class NuumLiveIE(NuumBaseIE):
IE_NAME = 'nuum:live'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://nuum.ru/channel/mts_live',
'only_matching': True,
}]

def _real_extract(self, url):
channel = self._match_id(url)
channel_info = self._get_channel_info(channel)
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
seproDev marked this conversation as resolved.
Show resolved Hide resolved
raise UserNotLive(video_id=channel)

m3u8_url, metadata = self._parse_video_data(channel_info['media_container'])
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, channel, 'mp4', live=True)
return {
'formats': formats,
'subtitles': subtitles,
'webpage_url': f'https://nuum.ru/streams/{metadata["id"]}',
'extractor_key': NuumMediaIE.ie_key(),
'extractor': NuumMediaIE.IE_NAME,
**metadata,
}


class NuumTabIE(NuumBaseIE):
IE_NAME = 'nuum:tab'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
_TESTS = [{
'url': 'https://nuum.ru/channel/dankon_/clips',
'info_dict': {
'id': 'dankon__clips',
'title': 'Dankon_',
},
'playlist_mincount': 29,
}, {
'url': 'https://nuum.ru/channel/dankon_/videos',
'info_dict': {
'id': 'dankon__videos',
'title': 'Dankon_',
},
'playlist_mincount': 2,
}, {
'url': 'https://nuum.ru/channel/dankon_/streams',
'info_dict': {
'id': 'dankon__streams',
'title': 'Dankon_',
},
'playlist_mincount': 5,
seproDev marked this conversation as resolved.
Show resolved Hide resolved
}]

_PAGE_SIZE = 50
bashonly marked this conversation as resolved.
Show resolved Hide resolved

def _fetch_page(self, channel_id, tab_type, tab_id, page):
CONTAINER_TYPES = {
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
'videos': ['LONG_VIDEO'],
'streams': ['SINGLE'],
}

media_containers = self._call_api(
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
query={
'limit': self._PAGE_SIZE,
'offset': page * self._PAGE_SIZE,
'channel_id': channel_id,
'media_container_status': 'STOPPED',
'media_container_type': CONTAINER_TYPES[tab_type],
})
for container in traverse_obj(media_containers, (..., {dict})):
_, metadata = self._parse_video_data(container)
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)

def _real_extract(self, url):
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
tab_id = f'{channel_name}_{tab_type}'
channel_data = self._get_channel_info(channel_name)['channel']

return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
159 changes: 0 additions & 159 deletions yt_dlp/extractor/wasdtv.py

This file was deleted.