Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[extractor/Douyin] fix Douyin #9239

Merged
merged 11 commits into from Feb 28, 2024
30 changes: 11 additions & 19 deletions yt_dlp/extractor/tiktok.py
Expand Up @@ -6,7 +6,7 @@
import time

from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
Expand All @@ -15,7 +15,6 @@
UserNotLive,
determine_ext,
format_field,
get_first,
int_or_none,
join_nonempty,
merge_dicts,
Expand Down Expand Up @@ -242,7 +241,7 @@ def extract_addr(addr, add_meta={}):
'format_id': 'play_addr',
'format_note': 'Direct video',
'vcodec': 'h265' if traverse_obj(
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
bashonly marked this conversation as resolved.
Show resolved Hide resolved
'width': video_info.get('width'),
'height': video_info.get('height'),
}))
Expand Down Expand Up @@ -767,7 +766,7 @@ def _video_entries_api(self, webpage, user_id, username):
'max_cursor': 0,
'min_cursor': 0,
'retry_type': 'no_retry',
'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
bashonly marked this conversation as resolved.
Show resolved Hide resolved
}

for page in itertools.count(1):
Expand Down Expand Up @@ -1035,24 +1034,17 @@ class DouyinIE(TikTokBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)

try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
e.expected = True
self.to_screen(f'{e}; trying with webpage')

webpage = self._download_webpage(url, video_id)
render_data = self._search_json(
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
if not render_data:
detail = traverse_obj(self._download_json(
'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
'Downloading web detail JSON', 'Failed to download web detail JSON',
query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
if not detail:
# TODO: Run verification challenge code to generate signature cookies
cookies = self._get_cookies(self._WEBPAGE_HOST)
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
raise ExtractorError(
'Fresh cookies (not necessarily logged in) are needed', expected=expected)
'Fresh cookies (not necessarily logged in) are needed',
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))

return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
return self._parse_aweme_video_app(detail)


class TikTokVMIE(InfoExtractor):
Expand Down