New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ie/tiktok:user] Fix extractor #9661
base: master
Are you sure you want to change the base?
Conversation
Authored by: bashonly
Authored by: bashonly
|
||
old_cursor = cursor | ||
cursor = traverse_obj( | ||
response, ('itemList', -1, 'createTime', {lambda x: x * 1E3}, {int_or_none})) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
response, ('itemList', -1, 'createTime', {lambda x: x * 1E3}, {int_or_none})) | |
response, ('itemList', -1, 'createTime', {lambda x: int_or_none(x, invscale=1E3})) |
or actually, just
response, ('itemList', -1, 'createTime', {lambda x: x * 1E3}, {int_or_none})) | |
response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3})) |
cursor = traverse_obj( | ||
response, ('itemList', -1, 'createTime', {lambda x: x * 1E3}, {int_or_none})) | ||
if not cursor: | ||
cursor = old_cursor - 604800000 # jump 1 week back in time |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cursor = old_cursor - 604800000 # jump 1 week back in time | |
cursor = old_cursor - 7 * 86_400_000 # jump 1 week back in time |
is more readable imo. As for the comment, it may be more useful to explain "why 1 week"
_WORKING = False | ||
_VALID_URL = [ | ||
r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])', | ||
r'tiktokuser:(?P<id>MS4wLjABAAAA[\w-]{64})', |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could do something like this if you think it's useful
r'tiktokuser:(?P<id>MS4wLjABAAAA[\w-]{64})', | |
r'tiktokuser:(?P<id>MS4wLjABAAAA[\w-]{64})(?:@(?P<username>[\w.-]+))?', |
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])' | ||
_WORKING = False | ||
_VALID_URL = [ | ||
r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])', |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])', | |
r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w.-]+)/?(?:$|[#?])', |
_TESTS = [{ | ||
'url': 'https://tiktok.com/@corgibobaa?lang=en', | ||
'playlist_mincount': 45, | ||
'info_dict': { | ||
'id': '6935371178089399301', | ||
'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT', |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I said on discord, this isn't necessary to get tiktokuser:
working, but I leave it to your discretion
'secUid': sec_uid, | ||
'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest | ||
'tz_name': 'UTC', | ||
'verifyFp': 'verify_%s' % ''.join(random.choices(string.hexdigits, k=7)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
'verifyFp': 'verify_%s' % ''.join(random.choices(string.hexdigits, k=7)), | |
'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}', |
return traverse_obj( | ||
self._get_universal_data(webpage, user_name), | ||
('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) or traverse_obj( | ||
self._get_sigi_state(webpage, user_name), | ||
('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid'), | ||
('UserModule', 'users', ..., 'secUid'), | ||
get_all=False, expected_type=str) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Personally, I try to avoid this type of indenting where it's not obvious where one traverse_obj ends and next begins. But not a big deal
return traverse_obj( | |
self._get_universal_data(webpage, user_name), | |
('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) or traverse_obj( | |
self._get_sigi_state(webpage, user_name), | |
('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid'), | |
('UserModule', 'users', ..., 'secUid'), | |
get_all=False, expected_type=str) | |
return (traverse_obj(self._get_universal_data(webpage, user_name), | |
('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) | |
or traverse_obj(self._get_sigi_state(webpage, user_name), | |
('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}), | |
('UserModule', 'users', ..., 'secUid', {str}, {any}))) |
user_name, sec_uid = None, None | ||
if url.startswith('tiktokuser:'): | ||
sec_uid = self._match_id(url) | ||
else: | ||
user_name = self._match_id(url) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
user_name, sec_uid = None, None | |
if url.startswith('tiktokuser:'): | |
sec_uid = self._match_id(url) | |
else: | |
user_name = self._match_id(url) | |
if url.startswith('tiktokuser:'): | |
sec_uid, user_name = self._match_id(url), None | |
else: | |
sec_uid, user_name = None, self._match_id(url) |
if not sec_uid: | ||
for user_url, msg in ( | ||
(self._UPLOADER_URL_FORMAT % user_name, 'user'), | ||
(self._UPLOADER_URL_FORMAT % f'{user_name}/live', 'live'), | ||
): | ||
sec_uid = self._get_sec_uid(user_url, user_name, msg) | ||
if sec_uid: | ||
break |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if not sec_uid: | |
for user_url, msg in ( | |
(self._UPLOADER_URL_FORMAT % user_name, 'user'), | |
(self._UPLOADER_URL_FORMAT % f'{user_name}/live', 'live'), | |
): | |
sec_uid = self._get_sec_uid(user_url, user_name, msg) | |
if sec_uid: | |
break | |
for user_url, msg in ( | |
(self._UPLOADER_URL_FORMAT % user_name, 'user'), | |
(self._UPLOADER_URL_FORMAT % f'{user_name}/live', 'live'), | |
): | |
if sec_uid: | |
break | |
sec_uid = self._get_sec_uid(user_url, user_name, msg) |
WIP, TODO:
https://www.tiktok.com/@SEC_UID
Closes #3776
Template
Before submitting a pull request make sure you have:
In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under Unlicense. Check all of the following options that apply:
What is the purpose of your pull request?