Skip to content

Commit

Permalink
[extractor/iprima] Fix extractor (relax nuxt function regex, add js_t…
Browse files Browse the repository at this point in the history
…o_json hack)
  • Loading branch information
std-move committed Jun 3, 2023
1 parent 2fb35f6 commit 5782255
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1672,7 +1672,7 @@ def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
Expand Down
3 changes: 3 additions & 0 deletions yt_dlp/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3233,8 +3233,11 @@ def fix_kv(m):

def create_map(mobj):
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
def create_array(mobj):
return mobj.group(1) + js_to_json(f'[{mobj.group(2)}]', vars=vars) + mobj.group(3)

code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
code = re.sub(r'^(.*?)(?:new\s+)?Array\((.*?)\)(.*?)$', create_array, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
Expand Down

0 comments on commit 5782255

Please sign in to comment.