Skip to content

Commit

Permalink
[core] Fix regex for sanitizing date constructor in js_to_json
Browse files Browse the repository at this point in the history
Given the following JavaScript code as string:

    [new Date("foobar"), '("baz")']

The greedy quantifier's usage matches the string till the last quote instead of just the text inside the parenthesis. This patch fixes that using the `STRING_RE` regex for matching string literals properly.

Also updated tests.
  • Loading branch information
awalgarg committed Oct 7, 2023
1 parent b095fd3 commit 927f919
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
7 changes: 6 additions & 1 deletion test/test_utils.py
Expand Up @@ -1209,6 +1209,9 @@ def test_js_to_json_edgecases(self):
on = js_to_json('\'"\\""\'')
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')

on = js_to_json('[new Date("spam"), \'("eggs")\']')
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')

def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
Expand All @@ -1220,11 +1223,13 @@ def test_js_to_json_template_literal(self):
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
self.assertEqual(js_to_json('`${name}`', {}), '"name"')

def test_js_to_json_map_array_constructors(self):
def test_js_to_json_common_constructors(self):
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")

def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/utils/_utils.py
Expand Up @@ -2744,7 +2744,7 @@ def create_map(mobj):
code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code)
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
Expand Down

0 comments on commit 927f919

Please sign in to comment.