diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 51a9f28f065..a9427650b01 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -30,7 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N from .http import HttpFD from .ism import IsmFD from .mhtml import MhtmlFD -from .niconico import NiconicoDmcFD, NiconicoLiveFD +from .niconico import NiconicoDmcFD, NiconicoDmsFD, NiconicoLiveFD from .rtmp import RtmpFD from .rtsp import RtspFD from .websocket import WebSocketFragmentFD @@ -50,6 +50,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, + 'niconico_dms': NiconicoDmsFD, 'niconico_live': NiconicoLiveFD, 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, @@ -66,6 +67,7 @@ def shorten_protocol_name(proto, simplify=False): 'http_dash_segments': 'dash', 'http_dash_segments_generator': 'dashG', 'niconico_dmc': 'dmc', + 'niconico_dms': 'dms', 'websocket_frag': 'WSfrag', } if simplify: diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index fef8bff73ad..348d4ccfee6 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -56,6 +56,25 @@ def heartbeat(): return success +class NiconicoDmsFD(FileDownloader): + """ Downloading niconico douga from DMS """ + + def real_download(self, filename, info_dict): + from ..extractor.niconico import NiconicoIE + + self.to_screen('[%s] Downloading from DMS' % self.FD_NAME) + ie = NiconicoIE(self.ydl) + manifest_url = ie._get_dms_manifest_url(info_dict) + info_dict['url'] = manifest_url + info_dict['protocol'] = "m3u8" + + fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) + + success = fd.real_download(filename, info_dict) + + return success + + class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index b889c752ccf..ec9fb3edd36 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -19,7 +19,6 @@ int_or_none, join_nonempty, parse_duration, - parse_filesize, parse_iso8601, parse_resolution, qualities, @@ -348,20 +347,34 @@ def ping(): return info_dict, heartbeat_info_dict - def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol): + def _get_dms_manifest_url(self, info_dict): + formats = info_dict['url'].split(':')[1].split('/')[1:3] + payload = json.dumps({ + 'outputs': [formats] + }).encode("utf-8") + api_data = self._download_json( + 'https://nvapi.nicovideo.jp/v1/watch/%s/access-rights/hls?actionTrackId=%s' % (info_dict['id'], traverse_obj(info_dict, ('_api_data', 'client', 'watchTrackId'))), info_dict['id'], + note='Requesting Manifest Url', errnote='Unable to fetch data', + data=payload, + headers={ + 'Accept-Encoding': 'br', + 'Content-Type': 'application/json', + 'X-Request-With': 'https://www.nicovideo.jp', + 'X-Access-Right-Key': traverse_obj(info_dict, ('_api_data', 'media', 'domand', 'accessRightKey')), + 'X-Frontend-Id': '6', + 'X-Frontend-Version': '0', + }).get('data') + return api_data.get('contentUrl') + + def _extract_dmc_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol): if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'): return None - def extract_video_quality(video_quality): - return parse_filesize('%sB' % self._search_regex( - r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default='')) - format_id = '-'.join( [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol]) vid_qual_label = traverse_obj(video_quality, ('metadata', 'label')) - vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate')) return { 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']), @@ -370,8 +383,9 @@ def extract_video_quality(video_quality): 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 'acodec': 'aac', 'vcodec': 'h264', - 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000), - 'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000), + 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), scale=1000), + 'vbr': float_or_none(traverse_obj(video_quality, ('metadata', 'bitrate')), scale=1000), + 'asr': int_or_none(traverse_obj(audio_quality, ('metadata', 'samplingRate'))), 'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')), 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), 'quality': -2 if 'low' in video_quality['id'] else None, @@ -383,6 +397,37 @@ def extract_video_quality(video_quality): } } + def _extract_dms_format_for_quality(self, video_id, audio_quality, video_quality): + + if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'): + return None + + format_id = '-'.join( + ['dms', remove_start(video_quality.get('id'), 'video-'), remove_start(audio_quality.get('id'), 'audio-'), 'hls']) + + vid_qual_label = video_quality.get('label') + + return { + 'url': 'niconico_dms:%s/%s/%s' % (video_id, video_quality.get('id'), audio_quality.get('id')), + 'format_id': format_id, + 'format_note': join_nonempty('DMS', vid_qual_label, 'hls', delim=' '), + 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 + 'acodec': 'aac', + 'vcodec': 'h264', + 'abr': float_or_none(audio_quality.get('bitRate'), scale=1000), + 'vbr': float_or_none(video_quality.get('bitRate'), scale=1000), + 'asr': int_or_none(audio_quality.get('samplingRate')), + 'height': video_quality.get('height'), + 'width': video_quality.get('width'), + 'quality': video_quality.get('qualityLevel'), + 'protocol': 'niconico_dms', + 'expected_protocol': 'hls', + 'http_headers': { + 'Origin': 'https://www.nicovideo.jp', + 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, + } + } + def _real_extract(self, url): video_id = self._match_id(url) @@ -401,25 +446,35 @@ def _real_extract(self, url): 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id, note='Downloading API JSON', errnote='Unable to fetch data')['data'] except ExtractorError: - if not isinstance(e.cause, HTTPError): - raise - webpage = e.cause.response.read().decode('utf-8', 'replace') - error_msg = self._html_search_regex( - r'(?s)(.+?)', - webpage, 'error reason', default=None) - if not error_msg: - raise - raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True) + try: + api_data = self._download_json( + 'https://www.nicovideo.jp/api/watch/v3_guest/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id, + note='Downloading API JSON', errnote='Unable to fetch data')['data'] + except ExtractorError: + if not isinstance(e.cause, HTTPError): + raise + webpage = e.cause.response.read().decode('utf-8', 'replace') + error_msg = self._html_search_regex( + r'(?s)(.+?)', + webpage, 'error reason', default=None) + if not error_msg: + raise + raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True) formats = [] def get_video_info(*items, get_first=True, **kwargs): return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs) - quality_info = api_data['media']['delivery']['movie'] - session_api_data = quality_info['session'] - for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']): - fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol) + dmc_quality_info = traverse_obj(api_data, ('media', 'delivery', 'movie'), {}) + dmc_session_api_data = dmc_quality_info.get('session', {}) + for (audio_quality, video_quality, protocol) in itertools.product(dmc_quality_info.get('audios', []), dmc_quality_info.get('videos', []), dmc_session_api_data.get('protocols', [])): + fmt = self._extract_dmc_format_for_quality(video_id, audio_quality, video_quality, protocol) + if fmt: + formats.append(fmt) + dms_quality_info = traverse_obj(api_data, ('media', 'domand'), {}) + for (audio_quality, video_quality) in itertools.product(dms_quality_info.get('audios', []), dms_quality_info.get('videos', [])): + fmt = self._extract_dms_format_for_quality(video_id, audio_quality, video_quality) if fmt: formats.append(fmt) @@ -440,8 +495,10 @@ def get_video_info(*items, get_first=True, **kwargs): thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) + actual_video_id = traverse_obj(api_data, ('video', 'id')) + return { - 'id': video_id, + 'id': actual_video_id, '_api_data': api_data, 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'formats': formats, @@ -466,8 +523,8 @@ def get_video_info(*items, get_first=True, **kwargs): 'duration': ( parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) or get_video_info('duration')), - 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', - 'subtitles': self.extract_subtitles(video_id, api_data), + 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{actual_video_id}', + 'subtitles': self.extract_subtitles(actual_video_id, api_data), } def _get_subtitles(self, video_id, api_data):