Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[extractor,downloader/niconico] support new delivery server called Dowango Media Service(DMS) #8685

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d5dd9bf
[extractor/niconico] Add support for DMS server
xpadev-net Nov 30, 2023
ebade16
[extractor/niconico] add v3_guest endpoint to fallback api
xpadev-net Nov 30, 2023
36af8e6
[extractor/niconico] feat method for niconico dms file downloader
xpadev-net Nov 30, 2023
61fbeee
[downloader/niconico:dms] feat file downloader
xpadev-net Nov 30, 2023
d5a624c
[extractor/niconico] fix to conform to yt-dlp coding conventions
xpadev-net Nov 30, 2023
5545d28
[extractor/niconico] fix: allow null in each quality info
xpadev-net Nov 30, 2023
d38ffc7
[extractor/niconico] fix: dist.get method usage
xpadev-net Nov 30, 2023
13f176d
[downloader/niconico:dms] fix: audio stream missing
xpadev-net Jan 10, 2024
dcad2b6
Merge branch 'master' into niconico
xpadev-net Feb 20, 2024
e7fb02f
fix merge commit
bashonly Feb 21, 2024
b88b9fc
[extractor/niconico] fix: add asr metadata to dms format
xpadev-net Feb 21, 2024
c8f53fb
[extractor/niconico] refactor: call float_or_none() with kwargs
pzhlkj6612 Feb 22, 2024
2128bfb
[extractor/niconico] fix: code style
xpadev-net Feb 22, 2024
8392541
Merge pull request #1 from pzhlkj6612/xpadev-net-niconico
xpadev-net Feb 22, 2024
ab24153
[extractor/niconico] fix: extract video bitrate by video_quality.bitRate
xpadev-net Feb 22, 2024
860c050
[extractor/niconico] fix: flake8 error
xpadev-net Feb 22, 2024
c14b60e
[extractor/niconico] fix: add asr metadata to dmc format
xpadev-net Feb 28, 2024
f3b369f
[extractor/niconico] fix: convert asr as int
xpadev-net Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion yt_dlp/downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
from .http import HttpFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD, NiconicoLiveFD
from .niconico import NiconicoDmcFD, NiconicoDmsFD, NiconicoLiveFD
from .rtmp import RtmpFD
from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
Expand All @@ -50,6 +50,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
'ism': IsmFD,
'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
'niconico_dms': NiconicoDmsFD,
'niconico_live': NiconicoLiveFD,
'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD,
Expand All @@ -66,6 +67,7 @@ def shorten_protocol_name(proto, simplify=False):
'http_dash_segments': 'dash',
'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'niconico_dms': 'dms',
'websocket_frag': 'WSfrag',
}
if simplify:
Expand Down
19 changes: 19 additions & 0 deletions yt_dlp/downloader/niconico.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,25 @@ def heartbeat():
return success


class NiconicoDmsFD(FileDownloader):
""" Downloading niconico douga from DMS """

def real_download(self, filename, info_dict):
from ..extractor.niconico import NiconicoIE

self.to_screen('[%s] Downloading from DMS' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
manifest_url = ie._get_dms_manifest_url(info_dict)
info_dict['url'] = manifest_url
info_dict['protocol'] = "m3u8"

fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)

success = fd.real_download(filename, info_dict)

return success


class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """

Expand Down
101 changes: 84 additions & 17 deletions yt_dlp/extractor/niconico.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,26 @@ def ping():

return info_dict, heartbeat_info_dict

def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
def _get_dms_manifest_url(self, info_dict):
formats = info_dict['url'].split(':')[1].split('/')[1:3]
payload = json.dumps({
'outputs': [formats]
}).encode("utf-8")
api_data = self._download_json(
'https://nvapi.nicovideo.jp/v1/watch/%s/access-rights/hls?actionTrackId=%s' % (info_dict['id'], traverse_obj(info_dict, ('_api_data', 'client', 'watchTrackId'))), info_dict['id'],
note='Requesting Manifest Url', errnote='Unable to fetch data',
data=payload,
headers={
'Accept-Encoding': 'br',
'Content-Type': 'application/json',
'X-Request-With': 'https://www.nicovideo.jp',
'X-Access-Right-Key': traverse_obj(info_dict, ('_api_data', 'media', 'domand', 'accessRightKey')),
'X-Frontend-Id': '6',
'X-Frontend-Version': '0',
}).get('data')
return api_data.get('contentUrl')

def _extract_dmc_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):

if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
return None
Expand Down Expand Up @@ -383,6 +402,42 @@ def extract_video_quality(video_quality):
}
}

def _extract_dms_format_for_quality(self, video_id, audio_quality, video_quality):

if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
return None

def extract_video_quality(video_quality):
return parse_filesize('%sB' % self._search_regex(
r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))

format_id = '-'.join(
['dms', remove_start(video_quality.get('id'), 'video-'), remove_start(audio_quality.get('id'), 'audio-'), 'hls'])

vid_qual_label = video_quality.get('label')
vid_quality = video_quality.get('bitRate')

return {
'url': 'niconico_dms:%s/%s/%s' % (video_id, video_quality.get('id'), audio_quality.get('id')),
'format_id': format_id,
'format_note': join_nonempty('DMS', vid_qual_label, 'hls', delim=' '),
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
'acodec': 'aac',
'vcodec': 'h264',
'abr': float_or_none(audio_quality.get('bitRate'), 1000),
'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
xpadev-net marked this conversation as resolved.
Show resolved Hide resolved
xpadev-net marked this conversation as resolved.
Show resolved Hide resolved
'asr': float_or_none(audio_quality.get('samplingRate')),
xpadev-net marked this conversation as resolved.
Show resolved Hide resolved
'height': video_quality.get('height'),
'width': video_quality.get('width'),
'quality': video_quality.get('qualityLevel'),
'protocol': 'niconico_dms',
'expected_protocol': "hls",
xpadev-net marked this conversation as resolved.
Show resolved Hide resolved
'http_headers': {
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
}
}

def _real_extract(self, url):
video_id = self._match_id(url)

Expand All @@ -401,25 +456,35 @@ def _real_extract(self, url):
'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
note='Downloading API JSON', errnote='Unable to fetch data')['data']
except ExtractorError:
if not isinstance(e.cause, HTTPError):
raise
webpage = e.cause.response.read().decode('utf-8', 'replace')
error_msg = self._html_search_regex(
r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
webpage, 'error reason', default=None)
if not error_msg:
raise
raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
try:
api_data = self._download_json(
'https://www.nicovideo.jp/api/watch/v3_guest/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
note='Downloading API JSON', errnote='Unable to fetch data')['data']
Comment on lines +450 to +452
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion

Suggested change
api_data = self._download_json(
'https://www.nicovideo.jp/api/watch/v3_guest/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
note='Downloading API JSON', errnote='Unable to fetch data')['data']
api_data = self._download_json(
'https://www.nicovideo.jp/api/watch/v3_guest/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
note='Downloading API JSON', errnote='Unable to fetch data', expected_status=(400))['data']

This API will return HTTP 400 but some info of the video.

Why am I here

In #9338 (comment) , I couldn't get the "api_data" JSON from some videos. No json means no metadata. The "v3_guest" API solved my problem.

As a guest, I tested anime episode so43309069 - 戦国妖狐 世直し姉弟編 #3「永禄七年」 アニメ/動画 - ニコニコ動画 without valid Cookies:

VIDEO_ID='so43309069'

curl --silent --get \
  --data '_frontendId=6' \
  --data '_frontendVersion=0' \
  --data "actionTrackId=$(mktemp -u XXXXXXXXXX)_$(date '+%s')000" \
  "https://www.nicovideo.jp/api/watch/v3_guest/${VIDEO_ID}" | \
jq '
  {
    "meta": .meta,
    "data": {
      "reason": .data.reasonCode,
      "duration": .data.data.video.duration,
      "comment_count": .data.data.video.count.comment
    }
  }
'

With Japan IP:

{
  "meta": {
    "status": 400,
    "errorCode": "FORBIDDEN"
  },
  "data": {
    "reason": "PPV_VIDEO",
    "duration": null,
    "comment_count": null
  }
}

Without Japan IP:

{
  "meta": {
    "status": 400,
    "errorCode": "FORBIDDEN"
  },
  "data": {
    "reason": "DOMESTIC_VIDEO",
    "duration": 1419,
    "comment_count": 3830
  }
}

"DOMESTIC_VIDEO" means geo-restriction. "PPV_VIDEO" means "premium_only".

For members-only videos in fanclubs:

{
  "meta": {
    "status": 400,
    "errorCode": "FORBIDDEN"
  },
  "data": {
    "reason": "PPV_OR_CHANNEL_MEMBER_VIDEO",
    "duration": null,
    "comment_count": null
  }
}
{
  "meta": {
    "status": 400,
    "errorCode": "FORBIDDEN"
  },
  "data": {
    "reason": "CHANNEL_MEMBER_ONLY",
    "duration": null,
    "comment_count": null
  }
}

In #9338 (comment) , I wrote :

The fanclub of so43204345 might have mis-configured its payment method. I didn't see a PPV option, but only the "join" button.

Hence, both "PPV_OR_CHANNEL_MEMBER_VIDEO" and "CHANNEL_MEMBER_ONLY" mean "subscriber_only".

Conclusion

With the "v3_guest" API, we are able to extract more information. I think @xpadev-net should add this to the niconico IE by opening a new PR.

except ExtractorError:
if not isinstance(e.cause, HTTPError):
raise
webpage = e.cause.response.read().decode('utf-8', 'replace')
error_msg = self._html_search_regex(
r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
webpage, 'error reason', default=None)
if not error_msg:
raise
raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)

formats = []

def get_video_info(*items, get_first=True, **kwargs):
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)

quality_info = api_data['media']['delivery']['movie']
session_api_data = quality_info['session']
for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
dmc_quality_info = traverse_obj(api_data, ('media', 'delivery', 'movie'), {})
dmc_session_api_data = dmc_quality_info.get('session', {})
for (audio_quality, video_quality, protocol) in itertools.product(dmc_quality_info.get('audios', []), dmc_quality_info.get('videos', []), dmc_session_api_data.get('protocols', [])):
fmt = self._extract_dmc_format_for_quality(video_id, audio_quality, video_quality, protocol)
if fmt:
formats.append(fmt)
dms_quality_info = traverse_obj(api_data, ('media', 'domand'), {})
for (audio_quality, video_quality) in itertools.product(dms_quality_info.get('audios', []), dms_quality_info.get('videos', [])):
fmt = self._extract_dms_format_for_quality(video_id, audio_quality, video_quality)
if fmt:
formats.append(fmt)

Expand All @@ -440,8 +505,10 @@ def get_video_info(*items, get_first=True, **kwargs):

thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])

actual_video_id = traverse_obj(api_data, ('video', 'id'))

Comment on lines +498 to +499
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary? Did you encounter some problems with the video id?

return {
'id': video_id,
'id': actual_video_id,
'_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
Expand All @@ -466,8 +533,8 @@ def get_video_info(*items, get_first=True, **kwargs):
'duration': (
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{actual_video_id}',
'subtitles': self.extract_subtitles(actual_video_id, api_data),
}

def _get_subtitles(self, video_id, api_data):
Expand Down