Skip to content

Commit

Permalink
[ie/weibo] Fix extraction (#8463)
Browse files Browse the repository at this point in the history
Closes #8445
Authored by: c-basalt
  • Loading branch information
c-basalt committed Nov 11, 2023
1 parent 312a2d1 commit 15b252d
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions yt_dlp/extractor/weibo.py
@@ -1,3 +1,4 @@
import json
import random
import itertools
import urllib.parse
Expand All @@ -18,24 +19,33 @@


class WeiboBaseIE(InfoExtractor):
def _update_visitor_cookies(self, video_id):
def _update_visitor_cookies(self, visitor_url, video_id):
headers = {'Referer': visitor_url}
chrome_ver = self._search_regex(
r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
visitor_data = self._download_json(
'https://passport.weibo.com/visitor/genvisitor', video_id,
note='Generating first-visit guest request',
transform_source=strip_jsonp,
headers=headers, transform_source=strip_jsonp,
data=urlencode_postdata({
'cb': 'gen_callback',
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
}))
'fp': json.dumps({
'os': '1',
'browser': f'Chrome{chrome_ver},0,0,0',
'fonts': 'undefined',
'screenInfo': '1920*1080*24',
'plugins': ''
}, separators=(',', ':'))}))['data']

self._download_webpage(
'https://passport.weibo.com/visitor/visitor', video_id,
note='Running first-visit callback to get guest cookies',
query={
headers=headers, query={
'a': 'incarnate',
't': visitor_data['data']['tid'],
'w': 2,
'c': '%03d' % visitor_data['data']['confidence'],
't': visitor_data['tid'],
'w': 3 if visitor_data.get('new_tid') else 2,
'c': f'{visitor_data.get("confidence", 100):03d}',
'gc': '',
'cb': 'cross_domain',
'from': 'weibo',
'_rand': random.random(),
Expand All @@ -44,7 +54,7 @@ def _update_visitor_cookies(self, video_id):
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(video_id)
self._update_visitor_cookies(urlh.url, video_id)
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
return self._parse_json(webpage, video_id, fatal=fatal)

Expand Down

0 comments on commit 15b252d

Please sign in to comment.