Skip to content

Commit 1ceb657

Browse files
bashonlypukkandan
authored andcommitted
[fd/external] Scope cookies
- ffmpeg: Calculate cookies from cookiejar and pass with `-cookies` arg instead of `-headers` - aria2c, curl, wget: Write cookiejar to file and use external FD built-in cookiejar support - httpie: Calculate cookies from cookiejar instead of `http_headers` - axel: Calculate cookies from cookiejar and disable http redirection if cookies are passed - May break redirects, but axel simply don't have proper cookie support Ref: GHSA-v8mc-9377-rwjj Authored by: bashonly, coletdjnz
1 parent ad8902f commit 1ceb657

File tree

3 files changed

+179
-2
lines changed

3 files changed

+179
-2
lines changed

Diff for: test/test_downloader_external.py

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python3
2+
3+
# Allow direct execution
4+
import os
5+
import sys
6+
import unittest
7+
8+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9+
10+
import http.cookiejar
11+
12+
from test.helper import FakeYDL
13+
from yt_dlp.downloader.external import (
14+
Aria2cFD,
15+
AxelFD,
16+
CurlFD,
17+
FFmpegFD,
18+
HttpieFD,
19+
WgetFD,
20+
)
21+
22+
TEST_COOKIE = {
23+
'version': 0,
24+
'name': 'test',
25+
'value': 'ytdlp',
26+
'port': None,
27+
'port_specified': False,
28+
'domain': '.example.com',
29+
'domain_specified': True,
30+
'domain_initial_dot': False,
31+
'path': '/',
32+
'path_specified': True,
33+
'secure': False,
34+
'expires': None,
35+
'discard': False,
36+
'comment': None,
37+
'comment_url': None,
38+
'rest': {},
39+
}
40+
41+
TEST_INFO = {'url': 'http://www.example.com/'}
42+
43+
44+
class TestHttpieFD(unittest.TestCase):
45+
def test_make_cmd(self):
46+
with FakeYDL() as ydl:
47+
downloader = HttpieFD(ydl, {})
48+
self.assertEqual(
49+
downloader._make_cmd('test', TEST_INFO),
50+
['http', '--download', '--output', 'test', 'http://www.example.com/'])
51+
52+
# Test cookie header is added
53+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
54+
self.assertEqual(
55+
downloader._make_cmd('test', TEST_INFO),
56+
['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
57+
58+
59+
class TestAxelFD(unittest.TestCase):
60+
def test_make_cmd(self):
61+
with FakeYDL() as ydl:
62+
downloader = AxelFD(ydl, {})
63+
self.assertEqual(
64+
downloader._make_cmd('test', TEST_INFO),
65+
['axel', '-o', 'test', '--', 'http://www.example.com/'])
66+
67+
# Test cookie header is added
68+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
69+
self.assertEqual(
70+
downloader._make_cmd('test', TEST_INFO),
71+
['axel', '-o', 'test', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
72+
73+
74+
class TestWgetFD(unittest.TestCase):
75+
def test_make_cmd(self):
76+
with FakeYDL() as ydl:
77+
downloader = WgetFD(ydl, {})
78+
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
79+
# Test cookiejar tempfile arg is added
80+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
81+
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
82+
83+
84+
class TestCurlFD(unittest.TestCase):
85+
def test_make_cmd(self):
86+
with FakeYDL() as ydl:
87+
downloader = CurlFD(ydl, {})
88+
self.assertNotIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
89+
# Test cookiejar tempfile arg is added
90+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
91+
self.assertIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
92+
93+
94+
class TestAria2cFD(unittest.TestCase):
95+
def test_make_cmd(self):
96+
with FakeYDL() as ydl:
97+
downloader = Aria2cFD(ydl, {})
98+
downloader._make_cmd('test', TEST_INFO)
99+
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
100+
101+
# Test cookiejar tempfile arg is added
102+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
103+
cmd = downloader._make_cmd('test', TEST_INFO)
104+
self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
105+
106+
107+
@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
108+
class TestFFmpegFD(unittest.TestCase):
109+
_args = []
110+
111+
def _test_cmd(self, args):
112+
self._args = args
113+
114+
def test_make_cmd(self):
115+
with FakeYDL() as ydl:
116+
downloader = FFmpegFD(ydl, {})
117+
downloader._debug_cmd = self._test_cmd
118+
119+
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
120+
self.assertEqual(self._args, [
121+
'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
122+
'-c', 'copy', '-f', 'mp4', 'file:test'])
123+
124+
# Test cookies arg is added
125+
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
126+
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
127+
self.assertEqual(self._args, [
128+
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
129+
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
130+
131+
132+
if __name__ == '__main__':
133+
unittest.main()

Diff for: yt_dlp/cookies.py

+7
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,13 @@ def get_cookie_header(self, url):
13271327
self.add_cookie_header(cookie_req)
13281328
return cookie_req.get_header('Cookie')
13291329

1330+
def get_cookies_for_url(self, url):
1331+
"""Generate a list of Cookie objects for a given url"""
1332+
# Policy `_now` attribute must be set before calling `_cookies_for_request`
1333+
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1334+
self._policy._now = self._now = int(time.time())
1335+
return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
1336+
13301337
def clear(self, *args, **kwargs):
13311338
with contextlib.suppress(KeyError):
13321339
return super().clear(*args, **kwargs)

Diff for: yt_dlp/downloader/external.py

+39-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import enum
22
import json
3-
import os.path
3+
import os
44
import re
55
import subprocess
66
import sys
7+
import tempfile
78
import time
89
import uuid
910

@@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
4243
def real_download(self, filename, info_dict):
4344
self.report_destination(filename)
4445
tmpfilename = self.temp_name(filename)
46+
self._cookies_tempfile = None
4547

4648
try:
4749
started = time.time()
@@ -54,6 +56,9 @@ def real_download(self, filename, info_dict):
5456
# should take place
5557
retval = 0
5658
self.to_screen('[%s] Interrupted by user' % self.get_basename())
59+
finally:
60+
if self._cookies_tempfile:
61+
self.try_remove(self._cookies_tempfile)
5762

5863
if retval == 0:
5964
status = {
@@ -125,6 +130,16 @@ def _configuration_args(self, keys=None, *args, **kwargs):
125130
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
126131
keys, *args, **kwargs)
127132

133+
def _write_cookies(self):
134+
if not self.ydl.cookiejar.filename:
135+
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
136+
tmp_cookies.close()
137+
self._cookies_tempfile = tmp_cookies.name
138+
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
139+
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
140+
self.ydl.cookiejar.save(self._cookies_tempfile)
141+
return self.ydl.cookiejar.filename or self._cookies_tempfile
142+
128143
def _call_downloader(self, tmpfilename, info_dict):
129144
""" Either overwrite this or implement _make_cmd """
130145
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -184,6 +199,8 @@ class CurlFD(ExternalFD):
184199

185200
def _make_cmd(self, tmpfilename, info_dict):
186201
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
202+
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
203+
cmd += ['--cookie-jar', self._write_cookies()]
187204
if info_dict.get('http_headers') is not None:
188205
for key, val in info_dict['http_headers'].items():
189206
cmd += ['--header', f'{key}: {val}']
@@ -214,6 +231,9 @@ def _make_cmd(self, tmpfilename, info_dict):
214231
if info_dict.get('http_headers') is not None:
215232
for key, val in info_dict['http_headers'].items():
216233
cmd += ['-H', f'{key}: {val}']
234+
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
235+
if cookie_header:
236+
cmd += [f'Cookie: {cookie_header}', '--max-redirect=0']
217237
cmd += self._configuration_args()
218238
cmd += ['--', info_dict['url']]
219239
return cmd
@@ -223,7 +243,9 @@ class WgetFD(ExternalFD):
223243
AVAILABLE_OPT = '--version'
224244

225245
def _make_cmd(self, tmpfilename, info_dict):
226-
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
246+
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
247+
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
248+
cmd += ['--load-cookies', self._write_cookies()]
227249
if info_dict.get('http_headers') is not None:
228250
for key, val in info_dict['http_headers'].items():
229251
cmd += ['--header', f'{key}: {val}']
@@ -279,6 +301,8 @@ def _make_cmd(self, tmpfilename, info_dict):
279301
else:
280302
cmd += ['--min-split-size', '1M']
281303

304+
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
305+
cmd += [f'--load-cookies={self._write_cookies()}']
282306
if info_dict.get('http_headers') is not None:
283307
for key, val in info_dict['http_headers'].items():
284308
cmd += ['--header', f'{key}: {val}']
@@ -417,6 +441,14 @@ def _make_cmd(self, tmpfilename, info_dict):
417441
if info_dict.get('http_headers') is not None:
418442
for key, val in info_dict['http_headers'].items():
419443
cmd += [f'{key}:{val}']
444+
445+
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
446+
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
447+
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
448+
# 2: https://httpie.io/docs/cli/sessions
449+
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
450+
if cookie_header:
451+
cmd += [f'Cookie:{cookie_header}']
420452
return cmd
421453

422454

@@ -527,6 +559,11 @@ def _call_downloader(self, tmpfilename, info_dict):
527559

528560
selected_formats = info_dict.get('requested_formats') or [info_dict]
529561
for i, fmt in enumerate(selected_formats):
562+
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url'])
563+
if cookies:
564+
args.extend(['-cookies', ''.join(
565+
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
566+
for cookie in cookies)])
530567
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
531568
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
532569
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.

0 commit comments

Comments
 (0)