|
1 | 1 | import collections |
2 | 2 | import contextlib |
| 3 | +import copy |
3 | 4 | import datetime |
4 | 5 | import errno |
5 | 6 | import fileinput |
6 | 7 | import functools |
| 8 | +import http.cookiejar |
7 | 9 | import io |
8 | 10 | import itertools |
9 | 11 | import json |
|
25 | 27 | from .cache import Cache |
26 | 28 | from .compat import urllib # isort: split |
27 | 29 | from .compat import compat_os_name, compat_shlex_quote |
28 | | -from .cookies import load_cookies |
| 30 | +from .cookies import LenientSimpleCookie, load_cookies |
29 | 31 | from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name |
30 | 32 | from .downloader.rtmp import rtmpdump_version |
31 | 33 | from .extractor import gen_extractor_classes, get_info_extractor |
@@ -673,6 +675,9 @@ def process_color_policy(stream): |
673 | 675 | if auto_init and auto_init != 'no_verbose_header': |
674 | 676 | self.print_debug_header() |
675 | 677 |
|
| 678 | + self.__header_cookies = [] |
| 679 | + self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat |
| 680 | + |
676 | 681 | def check_deprecated(param, option, suggestion): |
677 | 682 | if self.params.get(param) is not None: |
678 | 683 | self.report_warning(f'{option} is deprecated. Use {suggestion} instead') |
@@ -1625,8 +1630,60 @@ def progress(msg): |
1625 | 1630 | self.to_screen('') |
1626 | 1631 | raise |
1627 | 1632 |
|
| 1633 | + def _load_cookies(self, data, *, from_headers=True): |
| 1634 | + """Loads cookies from a `Cookie` header |
| 1635 | +
|
| 1636 | + This tries to work around the security vulnerability of passing cookies to every domain. |
| 1637 | + See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj |
| 1638 | + The unscoped cookies are saved for later to be stored in the jar with a limited scope. |
| 1639 | +
|
| 1640 | + @param data The Cookie header as string to load the cookies from |
| 1641 | + @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required) |
| 1642 | + """ |
| 1643 | + for cookie in LenientSimpleCookie(data).values(): |
| 1644 | + if from_headers and any(cookie.values()): |
| 1645 | + raise ValueError('Invalid syntax in Cookie Header') |
| 1646 | + |
| 1647 | + domain = cookie.get('domain') or '' |
| 1648 | + expiry = cookie.get('expires') |
| 1649 | + if expiry == '': # 0 is valid |
| 1650 | + expiry = None |
| 1651 | + prepared_cookie = http.cookiejar.Cookie( |
| 1652 | + cookie.get('version') or 0, cookie.key, cookie.value, None, False, |
| 1653 | + domain, True, True, cookie.get('path') or '', bool(cookie.get('path')), |
| 1654 | + cookie.get('secure') or False, expiry, False, None, None, {}) |
| 1655 | + |
| 1656 | + if domain: |
| 1657 | + self.cookiejar.set_cookie(prepared_cookie) |
| 1658 | + elif from_headers: |
| 1659 | + self.deprecated_feature( |
| 1660 | + 'Passing cookies as a header is a potential security risk; ' |
| 1661 | + 'they will be scoped to the domain of the downloaded urls. ' |
| 1662 | + 'Please consider loading cookies from a file or browser instead.') |
| 1663 | + self.__header_cookies.append(prepared_cookie) |
| 1664 | + else: |
| 1665 | + self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping', |
| 1666 | + tb=False, is_error=False) |
| 1667 | + |
| 1668 | + def _apply_header_cookies(self, url): |
| 1669 | + """Applies stray header cookies to the provided url |
| 1670 | +
|
| 1671 | + This loads header cookies and scopes them to the domain provided in `url`. |
| 1672 | + While this is not ideal, it helps reduce the risk of them being sent |
| 1673 | + to an unintended destination while mostly maintaining compatibility. |
| 1674 | + """ |
| 1675 | + parsed = urllib.parse.urlparse(url) |
| 1676 | + if not parsed.hostname: |
| 1677 | + return |
| 1678 | + |
| 1679 | + for cookie in map(copy.copy, self.__header_cookies): |
| 1680 | + cookie.domain = f'.{parsed.hostname}' |
| 1681 | + self.cookiejar.set_cookie(cookie) |
| 1682 | + |
1628 | 1683 | @_handle_extraction_exceptions |
1629 | 1684 | def __extract_info(self, url, ie, download, extra_info, process): |
| 1685 | + self._apply_header_cookies(url) |
| 1686 | + |
1630 | 1687 | try: |
1631 | 1688 | ie_result = ie.extract(url) |
1632 | 1689 | except UserNotLive as e: |
@@ -2414,9 +2471,24 @@ def _calc_headers(self, info_dict): |
2414 | 2471 | if 'Youtubedl-No-Compression' in res: # deprecated |
2415 | 2472 | res.pop('Youtubedl-No-Compression', None) |
2416 | 2473 | res['Accept-Encoding'] = 'identity' |
2417 | | - cookies = self.cookiejar.get_cookie_header(info_dict['url']) |
| 2474 | + cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) |
2418 | 2475 | if cookies: |
2419 | | - res['Cookie'] = cookies |
| 2476 | + encoder = LenientSimpleCookie() |
| 2477 | + values = [] |
| 2478 | + for cookie in cookies: |
| 2479 | + _, value = encoder.value_encode(cookie.value) |
| 2480 | + values.append(f'{cookie.name}={value}') |
| 2481 | + if cookie.domain: |
| 2482 | + values.append(f'Domain={cookie.domain}') |
| 2483 | + if cookie.path: |
| 2484 | + values.append(f'Path={cookie.path}') |
| 2485 | + if cookie.secure: |
| 2486 | + values.append('Secure') |
| 2487 | + if cookie.expires: |
| 2488 | + values.append(f'Expires={cookie.expires}') |
| 2489 | + if cookie.version: |
| 2490 | + values.append(f'Version={cookie.version}') |
| 2491 | + info_dict['cookies'] = '; '.join(values) |
2420 | 2492 |
|
2421 | 2493 | if 'X-Forwarded-For' not in res: |
2422 | 2494 | x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') |
@@ -3423,6 +3495,8 @@ def download_with_info_file(self, info_filename): |
3423 | 3495 | infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) |
3424 | 3496 | for info in variadic(json.loads('\n'.join(f)))] |
3425 | 3497 | for info in infos: |
| 3498 | + self._load_cookies(info.get('cookies'), from_headers=False) |
| 3499 | + self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat |
3426 | 3500 | try: |
3427 | 3501 | self.__download_wrapper(self.process_ie_result)(info, download=True) |
3428 | 3502 | except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: |
|
0 commit comments