Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[extractor/tencent]: Add more formats and info #5950

Merged
merged 12 commits into from Feb 17, 2023
89 changes: 68 additions & 21 deletions yt_dlp/extractor/tencent.py
Expand Up @@ -8,6 +8,7 @@
from ..aes import aes_cbc_encrypt_bytes
from ..utils import (
ExtractorError,
float_or_none,
determine_ext,
int_or_none,
js_to_json,
Expand All @@ -19,6 +20,13 @@
class TencentBaseIE(InfoExtractor):
"""Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER"""

def _check_api_response(self, api_response):
if api_response.get('em') != 0 and api_response.get('exem') != 0:
if '您所在区域暂无此内容版权' in api_response.get('msg'):
self.raise_geo_restricted()
raise ExtractorError(f'Tencent said: {api_response.get("msg")}')
return None
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved

def _get_ckey(self, video_id, url, guid):
ua = self.get_param('http_headers')['User-Agent']

Expand Down Expand Up @@ -47,6 +55,11 @@ def _get_video_api_response(self, video_url, video_id, series_id, subtitle_forma
'sphttps': '1', # Enable HTTPS
'otype': 'json',
'spwm': '1',
'hevclv': '28', # Enable HEVC
'drm': '40', # Enable DRM
# For HDR
'spvideo': '4',
'spsfrhdr': '100',
# For SHD
'host': self._HOST,
'referer': self._REFERER,
Expand All @@ -63,27 +76,45 @@ def _get_video_api_response(self, video_url, video_id, series_id, subtitle_forma

def _extract_video_formats_and_subtitles(self, api_response, video_id):
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved
video_response = api_response['vl']['vi'][0]
video_width, video_height = video_response.get('vw'), video_response.get('vh')

formats, subtitles = [], {}
for video_format in video_response['ul']['ui']:
if video_format.get('hls') or determine_ext(video_format['url']) == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''),
video_id, 'mp4', fatal=False)
for f in fmts:
f.update({'width': video_width, 'height': video_height})

formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}',
'width': video_width,
'height': video_height,
'ext': 'mp4',
})

identifier = video_response.get('br')
format_response = traverse_obj(
api_response, ('fl', 'fi', lambda _, v: v['br'] == identifier),
expected_type=dict, get_all=False) or {}
pukkandan marked this conversation as resolved.
Show resolved Hide resolved
drm = format_response.get('drm')
format_id = format_response.get('name')
is_hdr = format_id == 'hdr10'
format_name = f'{format_response.get("sname")} ({format_response.get("resolution")})' + (' HDR' if is_hdr else '')
common_info = {
'width': video_response.get('vw'),
'height': video_response.get('vh'),
'abr': float_or_none(format_response.get('audiobandwidth'), scale=1000),
'vbr': float_or_none(format_response.get('bandwidth'), scale=1000),
'fps': format_response.get('vfps'),
'format': format_name,
'format_id ': format_id,
'format_note': format_name,
'dynamic_range': 'hdr10' if is_hdr else None,
pukkandan marked this conversation as resolved.
Show resolved Hide resolved
'has_drm': True if drm is not None and drm != 0 else False,
}
pukkandan marked this conversation as resolved.
Show resolved Hide resolved
for f in formats:
f.update(common_info)
pukkandan marked this conversation as resolved.
Show resolved Hide resolved

return formats, subtitles

def _extract_video_native_subtitles(self, api_response, subtitles_format):
Expand All @@ -97,21 +128,29 @@ def _extract_video_native_subtitles(self, api_response, subtitles_format):

return subtitles

def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id):
formats, subtitles = [], {}
for video_format, subtitle_format, video_quality in (
# '': 480p, 'shd': 720p, 'fhd': 1080p
('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')):
api_response = self._get_video_api_response(
url, video_id, series_id, subtitle_format, video_format, video_quality)
def _extract_all_video_quality(self, api_response):
quality = []
formats_response = api_response['fl']['fi']
for f in formats_response:
quality.append(f.get('name'))
return quality
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved

if api_response.get('em') != 0 and api_response.get('exem') != 0:
if '您所在区域暂无此内容版权' in api_response.get('msg'):
self.raise_geo_restricted()
raise ExtractorError(f'Tencent said: {api_response.get("msg")}')
def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id):
first_api_response = self._get_video_api_response(url, video_id, series_id, 'vtt', 'hls', 'hd')
self._check_api_response(first_api_response)
api_responses = [first_api_response]
quality = self._extract_all_video_quality(first_api_response)
for q in quality:
if q != 'sd' and q != 'hd':
api_response = self._get_video_api_response(
url, video_id, series_id, 'vtt', 'hls', q)
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved
self._check_api_response(api_response)
api_responses.append(api_response)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current yt-dlp is able to get 480p direct links, now those are missing

yt-dlp 2022.10.04

[debug] Command-line config: ['https://wetv.vip/play/7csb8h9c79p8idi/a0041yw44yk', '-F', '--verbose']
[debug] Encodings: locale UTF-8, fs utf-8, pref UTF-8, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.10.04 [4e0511f27] (debian*) API
[debug] Python 3.10.7 (CPython 64bit) - Linux-5.19.0-26-generic-x86_64-with-glibc2.36 (glibc 2.36)
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-108931-g4dda3b1653-20221104 (setts), ffprobe N-108931-g4dda3b1653-20221104, phantomjs 140337668638656, rtmpdump 2.4
[debug] Optional libraries: Cryptodome-3.11.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, pyxattr-0.7.2, secretstorage-3.3.3, sqlite3-2.6.0, websockets-10.2
[debug] Proxy map: {}
[debug] Loaded 1690 extractors
[debug] [wetv:episode] Extracting URL: https://wetv.vip/play/7csb8h9c79p8idi/a0041yw44yk
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[debug] Formats sorted by: hasvid, ie_pref, lang, quality, res, fps, hdr:12(7), vcodec:vp9.2(10), channels, acodec, filesize, fs_approx, tbr, vbr, abr, asr, proto, vext, aext, hasaud, source, id
[info] Available formats for a0041yw44yk:
ID EXT RESOLUTION │ PROTO │ VCODEC  ACODEC
───────────────────────────────────────────
0  mp4 864x362    │ https │ unknown unknown
1  mp4 864x362    │ https │ unknown unknown
2  mp4 864x362    │ https │ unknown unknown
3  mp4 864x362    │ https │ unknown unknown
4  mp4 1280x536   │ m3u8  │ unknown unknown
5  mp4 1280x536   │ m3u8  │ unknown unknown
6  mp4 1280x536   │ m3u8  │ unknown unknown
7  mp4 1280x536   │ m3u8  │ unknown unknown

Your branch

[debug] Command-line config: ['https://wetv.vip/play/7csb8h9c79p8idi/a0041yw44yk', '--verbose', '-F']
[debug] User config: []
[debug] System config: []
[debug] Encodings: locale UTF-8, fs utf-8, pref UTF-8, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.01.02 [d83b0ad80] (source)
[debug] Lazy loading extractors is disabled
[debug] Git HEAD: 62bd9c276
[debug] Python 3.10.7 (CPython x86_64 64bit) - Linux-5.19.0-26-generic-x86_64-with-glibc2.36 (OpenSSL 3.0.5 5 Jul 2022, glibc 2.36)
[debug] exe versions: ffmpeg N-108931-g4dda3b1653-20221104 (setts), ffprobe N-108931-g4dda3b1653-20221104, phantomjs broken, rtmpdump 2.4
[debug] Optional libraries: Cryptodome-3.15.0, certifi-2022.09.24, mutagen-1.46.0, sqlite3-2.6.0, websockets-10.4
[debug] Proxy map: {}
[debug] Loaded 1756 extractors
[wetv:episode] Extracting URL: https://wetv.vip/play/7csb8h9c79p8idi/a0041yw44yk
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading webpage
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[wetv:episode] a0041yw44yk: Downloading m3u8 information
[debug] Formats sorted by: hasvid, ie_pref, lang, quality, res, fps, hdr:12(7), vcodec:vp9.2(10), channels, acodec, filesize, fs_approx, tbr, vbr, abr, asr, proto, vext, aext, hasaud, source, id
[info] Available formats for a0041yw44yk:
ID EXT RESOLUTION FPS │ PROTO │ VCODEC  ACODEC  MORE INFO
──────────────────────────────────────────────────────────
0  mp4 256x108      0 │ m3u8  │ unknown unknown ld (144P)
1  mp4 256x108      0 │ m3u8  │ unknown unknown ld (144P)
2  mp4 864x362      0 │ m3u8  │ unknown unknown hd (480P)
3  mp4 864x362      0 │ m3u8  │ unknown unknown hd (480P)
4  mp4 1280x536     0 │ m3u8  │ unknown unknown shd (720P)
5  mp4 1280x536     0 │ m3u8  │ unknown unknown shd (720P)
6  mp4 1280x536     0 │ m3u8  │ unknown unknown shd (720P)
7  mp4 1280x536     0 │ m3u8  │ unknown unknown shd (720P)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is expected because the list of qualities returned by the API will not include 4K and HDR if the API is requested without the hls format.

Copy link
Member

@pukkandan pukkandan Feb 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should call both API? Removing existing formats is a regression and should only be done when there is no other choice

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pukkandan, the extraction speed for those outside of China is pretty bad, especially if you are using proxy to unblock content. In my daily use, with proxy, it takes 15–30 seconds to extract all AVC formats, in which case, extracting AVC and HEVC would double, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My opinion is to add all formats by default, and have an extract-arg to limit it. I think the current video_codec is too limited. We still have to download multiple m3u8s for a single codec. Something like formats=hevc-hd,avc-fhd would be better imo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we'd better not fetch both avc and hevc, even in China, it's a burden, and too many calls to the API might get blocked.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright. What do you think of the suggested change to extractor-arg?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean change video_codec to formats? Looks good, but doesn't add much value, and I think very few people need to download videos with both encoders.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused. Calling the API is expensive, but an option to reduce number of m3u8 downloads doesn't add value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, my fault, I misunderstood. It does have value, and I'll consider it when I have time.

pukkandan marked this conversation as resolved.
Show resolved Hide resolved

formats, subtitles = [], {}
for api_response in api_responses:
fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id)
native_subtitles = self._extract_video_native_subtitles(api_response, subtitle_format)
native_subtitles = self._extract_video_native_subtitles(api_response, 'vtt')
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved

formats.extend(fmts)
self._merge_subtitles(subs, native_subtitles, target=subtitles)
Expand Down Expand Up @@ -147,45 +186,53 @@ class VQQVideoIE(VQQBaseIE):

_TESTS = [{
'url': 'https://v.qq.com/x/page/q326831cny0.html',
'md5': '826ef93682df09e3deac4a6e6e8cdb6e',
'md5': '84568b3722e15e9cd023b5594558c4a7',
'info_dict': {
'id': 'q326831cny0',
'ext': 'mp4',
'title': '我是选手:雷霆裂阵,终极时刻',
'description': 'md5:e7ed70be89244017dac2a835a10aeb1e',
'thumbnail': r're:^https?://[^?#]+q326831cny0',
'format_id ': 'shd',
'has_drm ': False,
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved
Hill-98 marked this conversation as resolved.
Show resolved Hide resolved
},
}, {
'url': 'https://v.qq.com/x/page/o3013za7cse.html',
'md5': 'b91cbbeada22ef8cc4b06df53e36fa21',
'md5': 'cc431c4f9114a55643893c2c8ebf5592',
'info_dict': {
'id': 'o3013za7cse',
'ext': 'mp4',
'title': '欧阳娜娜VLOG',
'description': 'md5:29fe847497a98e04a8c3826e499edd2e',
'thumbnail': r're:^https?://[^?#]+o3013za7cse',
'format_id ': 'shd',
'has_drm ': False,
},
}, {
'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html',
'md5': '71459c5375c617c265a22f083facce67',
'md5': '87968df6238a65d2478f19c25adf850b',
'info_dict': {
'id': 'a00269ix3l8',
'ext': 'mp4',
'title': '鸡毛飞上天 第01集',
'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27',
'series': '鸡毛飞上天',
'format_id ': 'shd',
'has_drm ': False,
},
}, {
'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
'md5': '96b9fd4a189fdd4078c111f21d7ac1bc',
'md5': 'fadd10bf88aec3420f06f19ee1d24c5b',
'info_dict': {
'id': 's0043cwsgj0',
'ext': 'mp4',
'title': '第1集:如何快乐吃糖?',
'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213',
'thumbnail': r're:^https?://[^?#]+s0043cwsgj0',
'series': '青年理工工作者生活研究所',
'format_id ': 'shd',
'has_drm ': False,
},
}, {
# Geo-restricted to China
Expand Down