From 8815c92c14e30af39f17ba13c1451706ca47c5b5 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 1 Dec 2022 20:48:54 +0800 Subject: [PATCH 01/26] [extractor/niconicochannelplus] Add extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For VoD, live and list in channels on "Niconico Channel Plus" (Japanese: ニコニコチャンネルプラス). https://portal.nicochannel.jp/ Thanks to the following people who provided useful information: - lokilin, raised a request in issue 2537; - 雷兰濑 (railannad), raised a request in issue 4366 of streamlink/streamlink; - Trung0246, analyzed HTTP data in issue 4366 of streamlink/streamlink; - Lesmiscore, made cross-reference between the above two issues. --- README.md | 3 + yt_dlp/extractor/_extractors.py | 5 + yt_dlp/extractor/niconicochannelplus.py | 455 ++++++++++++++++++++++++ 3 files changed, 463 insertions(+) create mode 100644 yt_dlp/extractor/niconicochannelplus.py diff --git a/README.md b/README.md index fa55d130bb6..cdf64e29175 100644 --- a/README.md +++ b/README.md @@ -1774,6 +1774,9 @@ The following extractors use this feature: #### twitter * `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided +#### niconicochannelplus +* `max_comments`: Limit the amount of comments to gather (default: `120`) + NOTE: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2fe15f6d286..e140eb123df 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1205,6 +1205,11 @@ NineCNineMediaIE, CPTwentyFourIE, ) +from .niconicochannelplus import ( + NiconicoChannelPlusIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusChannelLivesIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py new file mode 100644 index 00000000000..76c88c760cc --- /dev/null +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -0,0 +1,455 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + UserNotLive, + filter_dict, + int_or_none, + parse_iso8601, + parse_qs, + traverse_obj, +) + + +class NiconicoChannelPlusBaseIE(InfoExtractor): + _WEBPAGE_BASE_URL = 'https://nicochannel.jp' + + def _call_api(self, path, item_id, *args, **kwargs): + return self._download_json( + f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs) + + def _find_fanclub_site_id(self, channel_name): + fanclub_list_json = self._call_api( + 'content_providers/channels', item_id=f'channels/{channel_name}', + note='Fetching channel list', errnote='Unable to fetch channel list', + )['data']['content_providers'] + + for fanclub in fanclub_list_json: + if fanclub.get('domain') == f'{self._WEBPAGE_BASE_URL}/{channel_name}': + return fanclub['id'] + + raise ExtractorError(f'Non-existing channel: {channel_name}', expected=True) + + def _get_channel_info(self, fanclub_site_id): + return self._call_api( + f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel info', errnote='Unable to fetch channel info', + )['data']['fanclub_site'] + + +class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): + IE_NAME = 'NiconicoChannelPlus' + IE_DESC = 'ニコニコチャンネルプラス' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/(?:video|live)/(?Psm\w+)' + _TESTS = [{ + # real video url, normal channel name. + 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', + 'info_dict': { + 'id': 'smsDd8EdFLcVZk9yyAhD6H7H', + 'title': '前田佳織里はニコ生がしたい!', + 'ext': 'mp4', + 'channel': '前田佳織里の世界攻略計画', + 'channel_id': 'kaorin', + 'channel_url': 'https://nicochannel.jp/kaorin', + 'live_status': 'not_live', + 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', + 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', + 'timestamp': 1641360276, + 'duration': 4097, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20220105', + }, + 'params': { + 'skip_download': True, + }, + }, { + # real video url, numbers in channel name. + 'url': 'https://nicochannel.jp/dateno8noba/video/smVGqtKpdmva4Mcrw7rbeQ8Y', + 'only_matching': True, + }, { + # real video url, hyphens in channel name. + 'url': 'https://nicochannel.jp/owstv-plus/video/smUPTNizUxVspEu5YeDtV3VB', + 'only_matching': True, + }, { + # real video url, underscores in channel name. + 'url': 'https://nicochannel.jp/sakaguchi_kugimiya/video/smieBu2u2kDTYCvYZmLvUaUN', + 'only_matching': True, + }, { + # real video url, dots in channel name. + 'url': 'https://nicochannel.jp/kanase.ito/video/smWCdanZc5bJYMPYhpVp6Sn6', + 'only_matching': True, + }, { + # fake live url, normal channel name. + 'url': 'https://nicochannel.jp/example/live/sm3Xample', + 'info_dict': { + 'id': 'sm3Xample', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': False, + }, + 'skip': '404 Not Found', + }, { + # was real live url, but 404 now. + 'url': 'https://nicochannel.jp/matsuda_shota/live/sm5VuVsRQSRqkyvLWFZtcou7', + 'info_dict': { + 'id': 'smpptPykLAjmZQchK4k4p93P', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + # was real live url, but 404 now. + 'url': 'https://nicochannel.jp/ayapro/live/sm8CmA9tsXUsCjwiKE59xyb6', + 'info_dict': { + 'id': 'sm8CmA9tsXUsCjwiKE59xyb6', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + # was real live url, but no video files for download. + 'url': 'https://nicochannel.jp/tasokosyo/live/smc7pjoBytehSmMrvT9CdA9f', + 'info_dict': { + 'id': 'smc7pjoBytehSmMrvT9CdA9f', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': False, + }, + 'skip': 'The downloaded file is empty', + }] + + def _real_extract(self, url): + content_code, channel_id = self._match_valid_url(url).group('code', 'channel') + channel_name = self._get_channel_info( + self._find_fanclub_site_id(channel_id) + ).get('fanclub_site_name') + + data_json = self._call_api( + f'video_pages/{content_code}', item_id=content_code, + note='Fetching video page info', errnote='Unable to fetch video page info', + )['data']['video_page'] + + live_status, session_id = self._get_live_status_and_session_id(content_code, data_json) + + return { + # mandatory metafields + + 'id': content_code, + 'title': data_json['title'], + 'formats': self._extract_m3u8_formats( + # "authenticated_url" is a format string contains "{session_id}". + m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), + video_id=content_code), + + # optional metafields + + 'ext': 'mp4', + '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), + + 'channel': channel_name, + 'channel_id': channel_id, + 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', + + 'live_status': live_status, + + 'thumbnail': data_json.get('thumbnail_url'), + 'description': data_json.get('description'), + 'timestamp': parse_iso8601(data_json.get('released_at'), delimiter=' '), + 'duration': int_or_none(traverse_obj(data_json, ('active_video_filename', 'length'))), + 'comment_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'number_of_comments'))), + 'view_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'total_views'))), + 'tags': traverse_obj(data_json, ('video_tags', ..., 'tag')), + + '__post_extractor': self.extract_comments( + content_code=content_code, + comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), + } + + def _get_comments(self, content_code, comment_group_id): + item_id = f'{content_code}/comments' + + if not comment_group_id: + return None + + comment_access_token = self._call_api( + f'video_pages/{content_code}/comments_user_token', item_id, + note='Getting comment token', errnote='Unable to get comment token', + )['data']['access_token'] + + comment_list = self._download_json( + 'https://comm-api.sheeta.com/messages.history', video_id=item_id, + note='Fetching comments', errnote='Unable to fetch comments', + headers={'Content-Type': 'application/json'}, + query={ + 'sort_direction': 'asc', + 'limit': traverse_obj(self._configuration_arg('max_comments', [120]), (0, )), + }, + data=json.dumps({ + 'token': comment_access_token, + 'group_id': comment_group_id, + }).encode('ascii')) + + for comment in comment_list: + yield { + 'author': comment.get('nickname'), + 'author_id': comment.get('sender_id'), + 'id': comment.get('id'), + 'text': comment.get('message'), + 'timestamp': int_or_none(traverse_obj(comment, 'updated_at', 'sent_at', 'created_at')), + 'author_is_uploader': comment.get('sender_id') == '-1', + } + + def _get_live_status_and_session_id(self, content_code, data_json): + video_type = data_json.get('type') + live_started_at = data_json.get('live_started_at') + live_finished_at = data_json.get('live_finished_at') + + if video_type == 'vod': + payload = {} + if live_finished_at: + live_status = 'was_live' + else: + live_status = 'not_live' + elif video_type == 'live': + if not live_started_at: + raise UserNotLive(video_id=content_code) + + if not live_finished_at: + live_status = 'is_live' + payload = {} + else: + live_status = 'was_live' + payload = {'broadcast_type': 'dvr'} + + # TODO: do ['video']['allow_dvr_flg'] and ['video']['convert_to_vod_flg'] affect DVR? + + video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) + video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) + + self.report_warning( + f'Live was ended, there might be no videos for download. ' + f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.', + video_id=content_code) + else: + # new type appears, we will handle it soon. + raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) + + # help us to analyze when error occurs + self.to_screen(f'{content_code}: video_type={video_type}, live_status={live_status}') + + session_id = self._call_api( + f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', + data=json.dumps(payload).encode('ascii'), headers={'Content-Type': 'application/json'}, + note='Getting session id', errnote='Unable to get session id', + )['data']['session_id'] + + return live_status, session_id + + +class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): + _PAGE_SIZE = 12 + + def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page): + item_list = self._call_api( + path, item_id, query={ + **query, + 'page': (page + 1), + 'per_page': self._PAGE_SIZE, + }, + note=f'Getting channel info (page {page + 1})', + errnote=f'Unable to get channel info (page {page + 1})', + )['data']['video_pages']['list'] + + for item in item_list: + content_code = item['content_code'] + + # "video/{code}" works for both VoD and live, but "live/{code}" doesn't work for VoD. + yield self.url_result( + f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) + + +class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:videos' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/videos(?:\?.*)?' + _TESTS = [{ + # query: None + 'url': 'https://nicochannel.jp/testman/videos', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: None + 'url': 'https://nicochannel.jp/testtarou/videos', + 'info_dict': { + 'id': 'testtarou-videos', + 'title': 'チャンネルプラステスト太郎-videos', + }, + 'playlist_mincount': 2, + }, { + # query: None + 'url': 'https://nicochannel.jp/testjirou/videos', + 'info_dict': { + 'id': 'testjirou-videos', + 'title': 'チャンネルプラステスト二郎-videos', + }, + 'playlist_mincount': 12, + }, { + # query: tag + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType + 'url': 'https://nicochannel.jp/testman/videos?vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: sort + 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: tag, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + sort: + -released_at 公開日が新しい順 (newest to oldest) + released_at 公開日が古い順 (oldest to newest) + -number_of_vod_views 再生数が多い順 (most play count) + number_of_vod_views コメントが多い順 (most comments) + vod_type (is "vodType" in "url"): + 0 すべて (all) + 1 会員限定 (members only) + 2 一部無料 (partially free) + 3 レンタル (rental) + 4 生放送アーカイブ (live archives) + 5 アップロード動画 (uploaded videos) + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_info(fanclub_site_id).get('fanclub_site_name') + qs = parse_qs(url) + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages', + filter_dict({ + 'tag': traverse_obj(qs, ('tag', 0)), + 'sort': traverse_obj(qs, ('sort', 0), default='-released_at'), + 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'), + }), + channel_id, f'{channel_id}/videos'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos') + + +class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:lives' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/lives' + _TESTS = [{ + 'url': 'https://nicochannel.jp/testman/lives', + 'info_dict': { + 'id': 'testman-lives', + 'title': '本番チャンネルプラステストマン-lives', + }, + 'playlist_mincount': 18, + }, { + 'url': 'https://nicochannel.jp/testtarou/lives', + 'info_dict': { + 'id': 'testtarou-lives', + 'title': 'チャンネルプラステスト太郎-lives', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nicochannel.jp/testjirou/lives', + 'info_dict': { + 'id': 'testjirou-lives', + 'title': 'チャンネルプラステスト二郎-lives', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + live_type: + 1 放送中 (on air) + 2 放送予定 (scheduled live streams, oldest to newest) + 3 過去の放送 - すべて (all ended live streams, newest to oldest) + 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest) + We use "4" instead of "3" because some recently ended live streams could not be downloaded. + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_info(fanclub_site_id).get('fanclub_site_name') + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages', + { + 'live_type': 4, + }, + channel_id, f'{channel_id}/lives'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives') From bb6be2533834139d358dac9daa898e308150fea6 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 2 Dec 2022 12:56:10 +0800 Subject: [PATCH 02/26] [extractor/niconicochannelplus] "ext" is also a mandatory metadata --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 76c88c760cc..86b1b2860e9 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -151,10 +151,10 @@ def _real_extract(self, url): # "authenticated_url" is a format string contains "{session_id}". m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), video_id=content_code), + 'ext': 'mp4', # optional metafields - 'ext': 'mp4', '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), 'channel': channel_name, From 71683c81ab1a535f217b89f53611d19ba1401ce8 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 13 Dec 2022 14:24:16 +0800 Subject: [PATCH 03/26] [extractor/niconicochannelplus] use "traverse_obj()" to find data with conditions --- yt_dlp/extractor/niconicochannelplus.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 86b1b2860e9..54aa31d4852 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -26,12 +26,12 @@ def _find_fanclub_site_id(self, channel_name): 'content_providers/channels', item_id=f'channels/{channel_name}', note='Fetching channel list', errnote='Unable to fetch channel list', )['data']['content_providers'] - - for fanclub in fanclub_list_json: - if fanclub.get('domain') == f'{self._WEBPAGE_BASE_URL}/{channel_name}': - return fanclub['id'] - - raise ExtractorError(f'Non-existing channel: {channel_name}', expected=True) + fanclub_id = traverse_obj(fanclub_list_json, ( + lambda _, v: v.get('domain') == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), + get_all=False) + if not fanclub_id: + raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) + return fanclub_id def _get_channel_info(self, fanclub_site_id): return self._call_api( From 074bdc19da426be3d8f35b40744ad0bbfe8ba544 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 13 Dec 2022 18:54:25 +0800 Subject: [PATCH 04/26] [extractor/niconicochannelplus] DVR works if both "allow_dvr_flg" and "convert_to_vod_flg" are True The name of those fields is confusing. --- yt_dlp/extractor/niconicochannelplus.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 54aa31d4852..dd56dd1eba7 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -232,15 +232,14 @@ def _get_live_status_and_session_id(self, content_code, data_json): live_status = 'was_live' payload = {'broadcast_type': 'dvr'} - # TODO: do ['video']['allow_dvr_flg'] and ['video']['convert_to_vod_flg'] affect DVR? - video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) - self.report_warning( - f'Live was ended, there might be no videos for download. ' - f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.', - video_id=content_code) + self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.') + + if not (video_allow_dvr_flg and video_convert_to_vod_flg): + raise ExtractorError( + 'Live was ended, there is no video for download.', video_id=content_code, expected=True) else: # new type appears, we will handle it soon. raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) From a6a95886343298b62fb3cc541e72986586105984 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 14 Dec 2022 13:46:11 +0800 Subject: [PATCH 05/26] [extractor/niconicochannelplus] age limit --- yt_dlp/extractor/niconicochannelplus.py | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index dd56dd1eba7..8f1eccd9855 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -39,6 +39,13 @@ def _get_channel_info(self, fanclub_site_id): note='Fetching channel info', errnote='Unable to fetch channel info', )['data']['fanclub_site'] + def _get_channel_user_info(self, fanclub_site_id): + return self._call_api( + f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel user info', errnote='Unable to fetch channel user info', + data=json.dumps('null').encode('ascii'), + )['data']['fanclub_site'] + class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): IE_NAME = 'NiconicoChannelPlus' @@ -54,6 +61,7 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'channel': '前田佳織里の世界攻略計画', 'channel_id': 'kaorin', 'channel_url': 'https://nicochannel.jp/kaorin', + 'age_limit': None, 'live_status': 'not_live', 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', @@ -67,6 +75,30 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'params': { 'skip_download': True, }, + }, { + # real video url, normal channel name, age limited, test purpose channel. + 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve', + 'info_dict': { + 'id': 'smDXbcrtyPNxLx9jc4BW69Ve', + 'title': 'test oshiro', + 'ext': 'mp4', + 'channel': '本番チャンネルプラステストマン', + 'channel_id': 'testman', + 'channel_url': 'https://nicochannel.jp/testman', + 'age_limit': 18, + 'live_status': 'was_live', + 'thumbnail': None, + 'description': None, + 'timestamp': 1666344616, + 'duration': 86465, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20221021', + }, + 'params': { + 'skip_download': True, + }, }, { # real video url, numbers in channel name. 'url': 'https://nicochannel.jp/dateno8noba/video/smVGqtKpdmva4Mcrw7rbeQ8Y', @@ -134,6 +166,9 @@ def _real_extract(self, url): channel_name = self._get_channel_info( self._find_fanclub_site_id(channel_id) ).get('fanclub_site_name') + age_limit = traverse_obj(self._get_channel_user_info( + self._find_fanclub_site_id(channel_id) + ), ('content_provider', 'age_limit')) data_json = self._call_api( f'video_pages/{content_code}', item_id=content_code, @@ -161,6 +196,7 @@ def _real_extract(self, url): 'channel_id': channel_id, 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', + 'age_limit': age_limit, 'live_status': live_status, 'thumbnail': data_json.get('thumbnail_url'), From 6caaf7a1aebe5c2d82edf130ddb5daf095592440 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 14 Dec 2022 13:58:03 +0800 Subject: [PATCH 06/26] [extractor/niconicochannelplus] rename func for "page_base_info" --- yt_dlp/extractor/niconicochannelplus.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 8f1eccd9855..d557b811b08 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -33,10 +33,10 @@ def _find_fanclub_site_id(self, channel_name): raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) return fanclub_id - def _get_channel_info(self, fanclub_site_id): + def _get_channel_base_info(self, fanclub_site_id): return self._call_api( f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', - note='Fetching channel info', errnote='Unable to fetch channel info', + note='Fetching channel base info', errnote='Unable to fetch channel base info', )['data']['fanclub_site'] def _get_channel_user_info(self, fanclub_site_id): @@ -163,7 +163,7 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): def _real_extract(self, url): content_code, channel_id = self._match_valid_url(url).group('code', 'channel') - channel_name = self._get_channel_info( + channel_name = self._get_channel_base_info( self._find_fanclub_site_id(channel_id) ).get('fanclub_site_name') age_limit = traverse_obj(self._get_channel_user_info( @@ -419,7 +419,7 @@ def _real_extract(self, url): channel_id = self._match_id(url) fanclub_site_id = self._find_fanclub_site_id(channel_id) - channel_name = self._get_channel_info(fanclub_site_id).get('fanclub_site_name') + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') qs = parse_qs(url) return self.playlist_result( @@ -476,7 +476,7 @@ def _real_extract(self, url): channel_id = self._match_id(url) fanclub_site_id = self._find_fanclub_site_id(channel_id) - channel_name = self._get_channel_info(fanclub_site_id).get('fanclub_site_name') + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') return self.playlist_result( OnDemandPagedList( From 131e646a7985e8d2f121566485ac0570fa832b34 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 14 Dec 2022 14:05:53 +0800 Subject: [PATCH 07/26] [extractor/niconicochannelplus] remove one-time variables --- yt_dlp/extractor/niconicochannelplus.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index d557b811b08..2c5af4d2798 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -163,12 +163,7 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): def _real_extract(self, url): content_code, channel_id = self._match_valid_url(url).group('code', 'channel') - channel_name = self._get_channel_base_info( - self._find_fanclub_site_id(channel_id) - ).get('fanclub_site_name') - age_limit = traverse_obj(self._get_channel_user_info( - self._find_fanclub_site_id(channel_id) - ), ('content_provider', 'age_limit')) + fanclub_site_id = self._find_fanclub_site_id(channel_id) data_json = self._call_api( f'video_pages/{content_code}', item_id=content_code, @@ -192,11 +187,11 @@ def _real_extract(self, url): '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), - 'channel': channel_name, + 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'), 'channel_id': channel_id, 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', - 'age_limit': age_limit, + 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')), 'live_status': live_status, 'thumbnail': data_json.get('thumbnail_url'), From d037d8081170f750183b7cc5c9b472a20f3c6da6 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 14 Dec 2022 23:38:17 +0800 Subject: [PATCH 08/26] [extractor/niconicochannelplus] remove None values from tests --- yt_dlp/extractor/niconicochannelplus.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 2c5af4d2798..25611b72b9f 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -61,7 +61,6 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'channel': '前田佳織里の世界攻略計画', 'channel_id': 'kaorin', 'channel_url': 'https://nicochannel.jp/kaorin', - 'age_limit': None, 'live_status': 'not_live', 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', @@ -87,8 +86,6 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'channel_url': 'https://nicochannel.jp/testman', 'age_limit': 18, 'live_status': 'was_live', - 'thumbnail': None, - 'description': None, 'timestamp': 1666344616, 'duration': 86465, 'comment_count': int, From 49288eb3ff8a0f043ad646aca43a55d7be36dd81 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 16 Dec 2022 21:04:05 +0800 Subject: [PATCH 09/26] [extractor/niconicochannelplus] remove the "ext" field from info_dict The "ext" field does not work with "formats" and will be ignored. --- yt_dlp/extractor/niconicochannelplus.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 25611b72b9f..374b6467fde 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -178,7 +178,6 @@ def _real_extract(self, url): # "authenticated_url" is a format string contains "{session_id}". m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), video_id=content_code), - 'ext': 'mp4', # optional metafields From 70eef1cc33a710d5adc8a4b5b2617400fdcadd4d Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 14 Mar 2023 22:36:42 +0800 Subject: [PATCH 10/26] [extractor/niconicochannelplus] add a header for video_pages link --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 374b6467fde..3d4666e3bca 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -163,7 +163,7 @@ def _real_extract(self, url): fanclub_site_id = self._find_fanclub_site_id(channel_id) data_json = self._call_api( - f'video_pages/{content_code}', item_id=content_code, + f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'}, note='Fetching video page info', errnote='Unable to fetch video page info', )['data']['video_page'] From 6dadb6fd7091b3175537fb96781abae5b2a35052 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 4 May 2023 00:49:04 +0800 Subject: [PATCH 11/26] [extractor/niconicochannelplus] raise_no_formats for upcoming live Thanks for bashonly's comment! --- yt_dlp/extractor/niconicochannelplus.py | 28 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 3d4666e3bca..20314b030b1 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -5,7 +5,6 @@ from ..utils import ( ExtractorError, OnDemandPagedList, - UserNotLive, filter_dict, int_or_none, parse_iso8601, @@ -169,15 +168,31 @@ def _real_extract(self, url): live_status, session_id = self._get_live_status_and_session_id(content_code, data_json) + release_timestamp_str = data_json.get('live_scheduled_start_at') + + formats = [] + + if live_status == 'is_upcoming': + if release_timestamp_str: + self.raise_no_formats( + f'This live event will begin at {release_timestamp_str} UTC', expected=True, + video_id=content_code) + else: + # has not encountered this situation, but still give it a chance. + self.raise_no_formats( + 'This event has not started yet', expected=True, video_id=content_code) + else: + formats = self._extract_m3u8_formats( + # "authenticated_url" is a format string contains "{session_id}". + m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), + video_id=content_code) + return { # mandatory metafields 'id': content_code, 'title': data_json['title'], - 'formats': self._extract_m3u8_formats( - # "authenticated_url" is a format string contains "{session_id}". - m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), - video_id=content_code), + 'formats': formats, # optional metafields @@ -193,6 +208,7 @@ def _real_extract(self, url): 'thumbnail': data_json.get('thumbnail_url'), 'description': data_json.get('description'), 'timestamp': parse_iso8601(data_json.get('released_at'), delimiter=' '), + 'release_timestamp': parse_iso8601(release_timestamp_str, delimiter=' '), 'duration': int_or_none(traverse_obj(data_json, ('active_video_filename', 'length'))), 'comment_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'number_of_comments'))), 'view_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'total_views'))), @@ -250,7 +266,7 @@ def _get_live_status_and_session_id(self, content_code, data_json): live_status = 'not_live' elif video_type == 'live': if not live_started_at: - raise UserNotLive(video_id=content_code) + return 'is_upcoming', '' if not live_finished_at: live_status = 'is_live' From 8bdba71eba08f5166b74c95e2b95bfc82a7167cd Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:01:38 +0800 Subject: [PATCH 12/26] [extractor/niconicochannelplus] add headers for session_ids link --- yt_dlp/extractor/niconicochannelplus.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 20314b030b1..53818fc242c 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -292,7 +292,11 @@ def _get_live_status_and_session_id(self, content_code, data_json): session_id = self._call_api( f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', - data=json.dumps(payload).encode('ascii'), headers={'Content-Type': 'application/json'}, + data=json.dumps(payload).encode('ascii'), headers={ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'origin': 'https://nicochannel.jp', + }, note='Getting session id', errnote='Unable to get session id', )['data']['session_id'] From 6f51c4b5efa6f1a654e5f570801edeac0543191c Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:08:27 +0800 Subject: [PATCH 13/26] [extractor/niconicochannelplus] simplify regex as suggested by pukkandan --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 53818fc242c..c77aaf227fc 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -49,7 +49,7 @@ def _get_channel_user_info(self, fanclub_site_id): class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): IE_NAME = 'NiconicoChannelPlus' IE_DESC = 'ニコニコチャンネルプラス' - _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/(?:video|live)/(?Psm\w+)' + _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' _TESTS = [{ # real video url, normal channel name. 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', From 98afa3d45b93f7f7cbeb045f365b58e0a9619cee Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:16:08 +0800 Subject: [PATCH 14/26] [extractor/niconicochannelplus] remove un-testable tests They used to be useful. --- yt_dlp/extractor/niconicochannelplus.py | 63 +------------------------ 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index c77aaf227fc..8fd7aa4c0b1 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -51,7 +51,6 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): IE_DESC = 'ニコニコチャンネルプラス' _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' _TESTS = [{ - # real video url, normal channel name. 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', 'info_dict': { 'id': 'smsDd8EdFLcVZk9yyAhD6H7H', @@ -74,7 +73,7 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'skip_download': True, }, }, { - # real video url, normal channel name, age limited, test purpose channel. + # age limited video; test purpose channel. 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve', 'info_dict': { 'id': 'smDXbcrtyPNxLx9jc4BW69Ve', @@ -95,66 +94,6 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'params': { 'skip_download': True, }, - }, { - # real video url, numbers in channel name. - 'url': 'https://nicochannel.jp/dateno8noba/video/smVGqtKpdmva4Mcrw7rbeQ8Y', - 'only_matching': True, - }, { - # real video url, hyphens in channel name. - 'url': 'https://nicochannel.jp/owstv-plus/video/smUPTNizUxVspEu5YeDtV3VB', - 'only_matching': True, - }, { - # real video url, underscores in channel name. - 'url': 'https://nicochannel.jp/sakaguchi_kugimiya/video/smieBu2u2kDTYCvYZmLvUaUN', - 'only_matching': True, - }, { - # real video url, dots in channel name. - 'url': 'https://nicochannel.jp/kanase.ito/video/smWCdanZc5bJYMPYhpVp6Sn6', - 'only_matching': True, - }, { - # fake live url, normal channel name. - 'url': 'https://nicochannel.jp/example/live/sm3Xample', - 'info_dict': { - 'id': 'sm3Xample', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': False, - }, - 'skip': '404 Not Found', - }, { - # was real live url, but 404 now. - 'url': 'https://nicochannel.jp/matsuda_shota/live/sm5VuVsRQSRqkyvLWFZtcou7', - 'info_dict': { - 'id': 'smpptPykLAjmZQchK4k4p93P', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': True, - }, - 'skip': '404 Not Found', - }, { - # was real live url, but 404 now. - 'url': 'https://nicochannel.jp/ayapro/live/sm8CmA9tsXUsCjwiKE59xyb6', - 'info_dict': { - 'id': 'sm8CmA9tsXUsCjwiKE59xyb6', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': True, - }, - 'skip': '404 Not Found', - }, { - # was real live url, but no video files for download. - 'url': 'https://nicochannel.jp/tasokosyo/live/smc7pjoBytehSmMrvT9CdA9f', - 'info_dict': { - 'id': 'smc7pjoBytehSmMrvT9CdA9f', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': False, - }, - 'skip': 'The downloaded file is empty', }] def _real_extract(self, url): From e5ba9bbec3504c2412f00b0c4b405f7cca261073 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:33:16 +0800 Subject: [PATCH 15/26] [extractor/niconicochannelplus] add a header for video & live pages link --- yt_dlp/extractor/niconicochannelplus.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 8fd7aa4c0b1..cc89501c0d4 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -252,6 +252,7 @@ def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, pa 'page': (page + 1), 'per_page': self._PAGE_SIZE, }, + headers={'fc_use_device': 'null'}, note=f'Getting channel info (page {page + 1})', errnote=f'Unable to get channel info (page {page + 1})', )['data']['video_pages']['list'] From bbf19a66d15d9767036ad021b361a41ef466153d Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Sep 2023 00:59:18 +0800 Subject: [PATCH 16/26] [extractor/niconicochannelplus] "traverse_obj" is non-fatal Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index cc89501c0d4..a13799a5160 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -26,7 +26,7 @@ def _find_fanclub_site_id(self, channel_name): note='Fetching channel list', errnote='Unable to fetch channel list', )['data']['content_providers'] fanclub_id = traverse_obj(fanclub_list_json, ( - lambda _, v: v.get('domain') == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), + lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), get_all=False) if not fanclub_id: raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) From 0c70647908f29d089bdb80726076044ea5b8dfcc Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Sep 2023 01:04:11 +0800 Subject: [PATCH 17/26] [extractor/niconicochannelplus] simplify message in "raise_no_formats" Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index a13799a5160..7155d80fea2 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -113,13 +113,10 @@ def _real_extract(self, url): if live_status == 'is_upcoming': if release_timestamp_str: - self.raise_no_formats( - f'This live event will begin at {release_timestamp_str} UTC', expected=True, - video_id=content_code) + msg = f'This live event will begin at {release_timestamp_str} UTC' else: - # has not encountered this situation, but still give it a chance. - self.raise_no_formats( - 'This event has not started yet', expected=True, video_id=content_code) + msg = 'This event has not started yet' + self.raise_no_formats(msg, expected=True, video_id=content_code) else: formats = self._extract_m3u8_formats( # "authenticated_url" is a format string contains "{session_id}". From 82ed6ec95120d96a61e0bda4259b7eb1b629a95c Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Sep 2023 01:30:53 +0800 Subject: [PATCH 18/26] [extractor/niconicochannelplus] extraction for metadata shoule not be fatal Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 7155d80fea2..74ad5515770 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -33,17 +33,17 @@ def _find_fanclub_site_id(self, channel_name): return fanclub_id def _get_channel_base_info(self, fanclub_site_id): - return self._call_api( + return traverse_obj(self._call_api( f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', - note='Fetching channel base info', errnote='Unable to fetch channel base info', - )['data']['fanclub_site'] + note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False, + ), ('data', 'fanclub_site', {dict})) or {} def _get_channel_user_info(self, fanclub_site_id): - return self._call_api( + return traverse_obj(self._call_api( f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}', - note='Fetching channel user info', errnote='Unable to fetch channel user info', + note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False, data=json.dumps('null').encode('ascii'), - )['data']['fanclub_site'] + ), ('data', 'fanclub_site', {dict})) or {} class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): From 451fc91dc652352ce4f59c0ceb6a1b4531169e7c Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Sep 2023 23:17:16 +0800 Subject: [PATCH 19/26] [extractor/niconicochannelplus] rm repetition Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 74ad5515770..67276e90f8d 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -191,22 +191,20 @@ def _get_comments(self, content_code, comment_group_id): def _get_live_status_and_session_id(self, content_code, data_json): video_type = data_json.get('type') - live_started_at = data_json.get('live_started_at') live_finished_at = data_json.get('live_finished_at') + payload = {} if video_type == 'vod': - payload = {} if live_finished_at: live_status = 'was_live' else: live_status = 'not_live' elif video_type == 'live': - if not live_started_at: + if not data_json.get('live_started_at'): return 'is_upcoming', '' if not live_finished_at: live_status = 'is_live' - payload = {} else: live_status = 'was_live' payload = {'broadcast_type': 'dvr'} From e0aade752dcfb915d9924cb92c8c61a86752b47f Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 23 Sep 2023 01:02:10 +0800 Subject: [PATCH 20/26] [extractor/niconicochannelplus] "traverse_obj" playlist items simply and safely Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 67276e90f8d..04cb188f352 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -241,7 +241,7 @@ class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): _PAGE_SIZE = 12 def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page): - item_list = self._call_api( + response = self._call_api( path, item_id, query={ **query, 'page': (page + 1), @@ -249,12 +249,9 @@ def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, pa }, headers={'fc_use_device': 'null'}, note=f'Getting channel info (page {page + 1})', - errnote=f'Unable to get channel info (page {page + 1})', - )['data']['video_pages']['list'] - - for item in item_list: - content_code = item['content_code'] + errnote=f'Unable to get channel info (page {page + 1})') + for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): # "video/{code}" works for both VoD and live, but "live/{code}" doesn't work for VoD. yield self.url_result( f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) From 3e5c6b9acd2d28c0c30ef9bfd36507b3c0b02a94 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 23 Sep 2023 02:45:54 +0800 Subject: [PATCH 21/26] [extractor/niconicochannelplus] "traverse_obj" data_json in one call Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 32 ++++++++++--------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 04cb188f352..d686fa23e39 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -7,9 +7,10 @@ OnDemandPagedList, filter_dict, int_or_none, - parse_iso8601, parse_qs, traverse_obj, + unified_timestamp, + url_or_none, ) @@ -124,32 +125,25 @@ def _real_extract(self, url): video_id=content_code) return { - # mandatory metafields - 'id': content_code, - 'title': data_json['title'], 'formats': formats, - - # optional metafields - '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), - 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'), 'channel_id': channel_id, 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', - 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')), 'live_status': live_status, - - 'thumbnail': data_json.get('thumbnail_url'), - 'description': data_json.get('description'), - 'timestamp': parse_iso8601(data_json.get('released_at'), delimiter=' '), - 'release_timestamp': parse_iso8601(release_timestamp_str, delimiter=' '), - 'duration': int_or_none(traverse_obj(data_json, ('active_video_filename', 'length'))), - 'comment_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'number_of_comments'))), - 'view_count': int_or_none(traverse_obj(data_json, ('video_aggregate_info', 'total_views'))), - 'tags': traverse_obj(data_json, ('video_tags', ..., 'tag')), - + 'release_timestamp': unified_timestamp(release_timestamp_str), + **traverse_obj(data_json, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'description': ('description', {str}), + 'timestamp': ('released_at', {unified_timestamp}), + 'duration': ('active_video_filename', 'length', {int_or_none}), + 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}), + 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}), + 'tags': ('video_tags', ..., 'tag', {str}), + }), '__post_extractor': self.extract_comments( content_code=content_code, comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), From bb8207450778b5b72949594ba1fb3903f541d6ed Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 23 Sep 2023 02:56:17 +0800 Subject: [PATCH 22/26] [extractor/niconicochannelplus] "traverse_obj" comment in one call Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index d686fa23e39..02568be4823 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -8,6 +8,7 @@ filter_dict, int_or_none, parse_qs, + str_or_none, traverse_obj, unified_timestamp, url_or_none, @@ -173,15 +174,15 @@ def _get_comments(self, content_code, comment_group_id): 'group_id': comment_group_id, }).encode('ascii')) - for comment in comment_list: - yield { - 'author': comment.get('nickname'), - 'author_id': comment.get('sender_id'), - 'id': comment.get('id'), - 'text': comment.get('message'), - 'timestamp': int_or_none(traverse_obj(comment, 'updated_at', 'sent_at', 'created_at')), - 'author_is_uploader': comment.get('sender_id') == '-1', - } + for comment in traverse_obj(comment_list, ...): + yield traverse_obj(comment, { + 'author': ('nickname', {str}), + 'author_id': ('sender_id', {str_or_none}), + 'id': ('id', {str_or_none}), + 'text': ('message', {str}), + 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}), + 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}), + }, get_all=False) def _get_live_status_and_session_id(self, content_code, data_json): video_type = data_json.get('type') From 0ac68ff225dadcfbb860950da301a2a2a6a05861 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 23 Sep 2023 10:29:29 +0800 Subject: [PATCH 23/26] [extractor/niconicochannelplus] "write_debug" debug info Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 02568be4823..4df0de66750 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -217,7 +217,7 @@ def _get_live_status_and_session_id(self, content_code, data_json): raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) # help us to analyze when error occurs - self.to_screen(f'{content_code}: video_type={video_type}, live_status={live_status}') + self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') session_id = self._call_api( f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', From 283152b1d0861ff0de0e004ae7f18153d228de6f Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 23 Sep 2023 10:36:29 +0800 Subject: [PATCH 24/26] [extractor/niconicochannelplus] add sanity check for "--extractor-args" Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/niconicochannelplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 4df0de66750..5476225835a 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -167,7 +167,7 @@ def _get_comments(self, content_code, comment_group_id): headers={'Content-Type': 'application/json'}, query={ 'sort_direction': 'asc', - 'limit': traverse_obj(self._configuration_arg('max_comments', [120]), (0, )), + 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120, }, data=json.dumps({ 'token': comment_access_token, From 7fefca8fc949cdfbe13edf85614cca93f64b34f3 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 23 Sep 2023 22:11:52 +0000 Subject: [PATCH 25/26] comment cleanup --- yt_dlp/extractor/niconicochannelplus.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 5476225835a..89af3f7b53e 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -121,7 +121,7 @@ def _real_extract(self, url): self.raise_no_formats(msg, expected=True, video_id=content_code) else: formats = self._extract_m3u8_formats( - # "authenticated_url" is a format string contains "{session_id}". + # "authenticated_url" is a format string that contains "{session_id}". m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), video_id=content_code) @@ -213,10 +213,8 @@ def _get_live_status_and_session_id(self, content_code, data_json): raise ExtractorError( 'Live was ended, there is no video for download.', video_id=content_code, expected=True) else: - # new type appears, we will handle it soon. raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) - # help us to analyze when error occurs self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') session_id = self._call_api( @@ -247,7 +245,7 @@ def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, pa errnote=f'Unable to get channel info (page {page + 1})') for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): - # "video/{code}" works for both VoD and live, but "live/{code}" doesn't work for VoD. + # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD yield self.url_result( f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) From f1db454ac2511e686054ae55773ed91665f04b41 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 23 Sep 2023 22:17:24 +0000 Subject: [PATCH 26/26] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ffb30764433..ee9857401aa 100644 --- a/README.md +++ b/README.md @@ -1846,7 +1846,7 @@ The following extractors use this feature: * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### niconicochannelplus -* `max_comments`: Limit the amount of comments to gather (default: `120`) +* `max_comments`: Maximum number of comments to extract - default is `120` #### tiktok * `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`