Skip to content

Commit 3121512

Browse files
Grub4Kpukkandan
authored andcommitted
[core] Change how Cookie headers are handled
Cookies are now saved and loaded under `cookies` key in the info dict instead of `http_headers.Cookie`. Cookies passed in headers are auto-scoped to the input URLs with a warning. Ref: GHSA-v8mc-9377-rwjj Authored by: Grub4K
1 parent f8b4bcc commit 3121512

File tree

3 files changed

+139
-4
lines changed

3 files changed

+139
-4
lines changed

Diff for: test/test_YoutubeDL.py

+56
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,62 @@ def _real_extract(self, url):
12131213
self.assertEqual(downloaded['extractor'], 'Video')
12141214
self.assertEqual(downloaded['extractor_key'], 'Video')
12151215

1216+
def test_header_cookies(self):
1217+
from http.cookiejar import Cookie
1218+
1219+
ydl = FakeYDL()
1220+
ydl.report_warning = lambda *_, **__: None
1221+
1222+
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
1223+
return Cookie(
1224+
version or 0, name, value, None, False,
1225+
domain, bool(domain), bool(domain), path, bool(path),
1226+
secure, expires, False, None, None, rest={})
1227+
1228+
_test_url = 'https://yt.dlp/test'
1229+
1230+
def test(encoded_cookies, cookies, headers=False, round_trip=None, error=None):
1231+
def _test():
1232+
ydl.cookiejar.clear()
1233+
ydl._load_cookies(encoded_cookies, from_headers=headers)
1234+
if headers:
1235+
ydl._apply_header_cookies(_test_url)
1236+
data = {'url': _test_url}
1237+
ydl._calc_headers(data)
1238+
self.assertCountEqual(
1239+
map(vars, ydl.cookiejar), map(vars, cookies),
1240+
'Extracted cookiejar.Cookie is not the same')
1241+
if not headers:
1242+
self.assertEqual(
1243+
data.get('cookies'), round_trip or encoded_cookies,
1244+
'Cookie is not the same as round trip')
1245+
ydl.__dict__['_YoutubeDL__header_cookies'] = []
1246+
1247+
with self.subTest(msg=encoded_cookies):
1248+
if not error:
1249+
_test()
1250+
return
1251+
with self.assertRaisesRegex(Exception, error):
1252+
_test()
1253+
1254+
test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
1255+
test('test=value', [cookie('test', 'value')], error='Unscoped cookies are not allowed')
1256+
test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
1257+
cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
1258+
cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
1259+
test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
1260+
cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
1261+
test('test="value; "; path=/test; domain=.yt.dlp', [
1262+
cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
1263+
round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
1264+
test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
1265+
round_trip='name=""; Domain=.yt.dlp')
1266+
1267+
test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
1268+
test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error='Invalid syntax')
1269+
ydl.deprecated_feature = ydl.report_error
1270+
test('test=value', [], headers=True, error='Passing cookies as a header is a potential security risk')
1271+
12161272

12171273
if __name__ == '__main__':
12181274
unittest.main()

Diff for: yt_dlp/YoutubeDL.py

+77-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import collections
22
import contextlib
3+
import copy
34
import datetime
45
import errno
56
import fileinput
67
import functools
8+
import http.cookiejar
79
import io
810
import itertools
911
import json
@@ -25,7 +27,7 @@
2527
from .cache import Cache
2628
from .compat import urllib # isort: split
2729
from .compat import compat_os_name, compat_shlex_quote
28-
from .cookies import load_cookies
30+
from .cookies import LenientSimpleCookie, load_cookies
2931
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
3032
from .downloader.rtmp import rtmpdump_version
3133
from .extractor import gen_extractor_classes, get_info_extractor
@@ -673,6 +675,9 @@ def process_color_policy(stream):
673675
if auto_init and auto_init != 'no_verbose_header':
674676
self.print_debug_header()
675677

678+
self.__header_cookies = []
679+
self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
680+
676681
def check_deprecated(param, option, suggestion):
677682
if self.params.get(param) is not None:
678683
self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@@ -1625,8 +1630,60 @@ def progress(msg):
16251630
self.to_screen('')
16261631
raise
16271632

1633+
def _load_cookies(self, data, *, from_headers=True):
1634+
"""Loads cookies from a `Cookie` header
1635+
1636+
This tries to work around the security vulnerability of passing cookies to every domain.
1637+
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1638+
The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1639+
1640+
@param data The Cookie header as string to load the cookies from
1641+
@param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1642+
"""
1643+
for cookie in LenientSimpleCookie(data).values():
1644+
if from_headers and any(cookie.values()):
1645+
raise ValueError('Invalid syntax in Cookie Header')
1646+
1647+
domain = cookie.get('domain') or ''
1648+
expiry = cookie.get('expires')
1649+
if expiry == '': # 0 is valid
1650+
expiry = None
1651+
prepared_cookie = http.cookiejar.Cookie(
1652+
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1653+
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1654+
cookie.get('secure') or False, expiry, False, None, None, {})
1655+
1656+
if domain:
1657+
self.cookiejar.set_cookie(prepared_cookie)
1658+
elif from_headers:
1659+
self.deprecated_feature(
1660+
'Passing cookies as a header is a potential security risk; '
1661+
'they will be scoped to the domain of the downloaded urls. '
1662+
'Please consider loading cookies from a file or browser instead.')
1663+
self.__header_cookies.append(prepared_cookie)
1664+
else:
1665+
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1666+
tb=False, is_error=False)
1667+
1668+
def _apply_header_cookies(self, url):
1669+
"""Applies stray header cookies to the provided url
1670+
1671+
This loads header cookies and scopes them to the domain provided in `url`.
1672+
While this is not ideal, it helps reduce the risk of them being sent
1673+
to an unintended destination while mostly maintaining compatibility.
1674+
"""
1675+
parsed = urllib.parse.urlparse(url)
1676+
if not parsed.hostname:
1677+
return
1678+
1679+
for cookie in map(copy.copy, self.__header_cookies):
1680+
cookie.domain = f'.{parsed.hostname}'
1681+
self.cookiejar.set_cookie(cookie)
1682+
16281683
@_handle_extraction_exceptions
16291684
def __extract_info(self, url, ie, download, extra_info, process):
1685+
self._apply_header_cookies(url)
1686+
16301687
try:
16311688
ie_result = ie.extract(url)
16321689
except UserNotLive as e:
@@ -2414,9 +2471,24 @@ def _calc_headers(self, info_dict):
24142471
if 'Youtubedl-No-Compression' in res: # deprecated
24152472
res.pop('Youtubedl-No-Compression', None)
24162473
res['Accept-Encoding'] = 'identity'
2417-
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
2474+
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
24182475
if cookies:
2419-
res['Cookie'] = cookies
2476+
encoder = LenientSimpleCookie()
2477+
values = []
2478+
for cookie in cookies:
2479+
_, value = encoder.value_encode(cookie.value)
2480+
values.append(f'{cookie.name}={value}')
2481+
if cookie.domain:
2482+
values.append(f'Domain={cookie.domain}')
2483+
if cookie.path:
2484+
values.append(f'Path={cookie.path}')
2485+
if cookie.secure:
2486+
values.append('Secure')
2487+
if cookie.expires:
2488+
values.append(f'Expires={cookie.expires}')
2489+
if cookie.version:
2490+
values.append(f'Version={cookie.version}')
2491+
info_dict['cookies'] = '; '.join(values)
24202492

24212493
if 'X-Forwarded-For' not in res:
24222494
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@@ -3423,6 +3495,8 @@ def download_with_info_file(self, info_filename):
34233495
infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
34243496
for info in variadic(json.loads('\n'.join(f)))]
34253497
for info in infos:
3498+
self._load_cookies(info.get('cookies'), from_headers=False)
3499+
self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
34263500
try:
34273501
self.__download_wrapper(self.process_ie_result)(info, download=True)
34283502
except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

Diff for: yt_dlp/downloader/common.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
timetuple_from_msec,
3333
try_call,
3434
)
35+
from ..utils.traversal import traverse_obj
3536

3637

3738
class FileDownloader:
@@ -419,7 +420,6 @@ def download(self, filename, info_dict, subtitle=False):
419420
"""Download to a filename using the info from info_dict
420421
Return True on success and False otherwise
421422
"""
422-
423423
nooverwrites_and_exists = (
424424
not self.params.get('overwrites', True)
425425
and os.path.exists(encodeFilename(filename))
@@ -453,6 +453,11 @@ def download(self, filename, info_dict, subtitle=False):
453453
self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
454454
time.sleep(sleep_interval)
455455

456+
# Filter the `Cookie` header from the info_dict to prevent leaks.
457+
# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
458+
info_dict['http_headers'] = dict(traverse_obj(info_dict, (
459+
'http_headers', {dict.items}, lambda _, pair: pair[0].lower() != 'cookie'))) or None
460+
456461
ret = self.real_download(filename, info_dict)
457462
self._finish_multiline_status()
458463
return ret, True

0 commit comments

Comments
 (0)