Skip to content

Commit

Permalink
[cookies] Move YoutubeDLCookieJar to cookies module (#7091)
Browse files Browse the repository at this point in the history
Authored by: coletdjnz
  • Loading branch information
coletdjnz committed May 27, 2023
1 parent 08916a4 commit b87e01c
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 137 deletions.
8 changes: 7 additions & 1 deletion test/test_YoutubeDLCookieJar.py
Expand Up @@ -11,7 +11,7 @@
import re
import tempfile

from yt_dlp.utils import YoutubeDLCookieJar
from yt_dlp.cookies import YoutubeDLCookieJar


class TestYoutubeDLCookieJar(unittest.TestCase):
Expand Down Expand Up @@ -47,6 +47,12 @@ def test_malformed_cookies(self):
# will be ignored
self.assertFalse(cookiejar._cookies)

def test_get_cookie_header(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
self.assertIn('HTTPONLY_COOKIE', header)


if __name__ == '__main__':
unittest.main()
7 changes: 3 additions & 4 deletions yt_dlp/YoutubeDL.py
Expand Up @@ -2404,7 +2404,7 @@ def _calc_headers(self, info_dict):
if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity'
cookies = self._calc_cookies(info_dict['url'])
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
if cookies:
res['Cookie'] = cookies

Expand All @@ -2416,9 +2416,8 @@ def _calc_headers(self, info_dict):
return res

def _calc_cookies(self, url):
pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
return self.cookiejar.get_cookie_header(url)

def _sort_thumbnails(self, thumbnails):
thumbnails.sort(key=lambda t: (
Expand Down
144 changes: 143 additions & 1 deletion yt_dlp/cookies.py
@@ -1,7 +1,9 @@
import base64
import collections
import contextlib
import http.cookiejar
import http.cookies
import io
import json
import os
import re
Expand All @@ -11,6 +13,7 @@
import sys
import tempfile
import time
import urllib.request
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
Expand All @@ -29,11 +32,14 @@
from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
Popen,
YoutubeDLCookieJar,
error_to_str,
escape_url,
expand_path,
is_path_like,
sanitize_url,
str_or_none,
try_call,
write_string,
)

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
Expand Down Expand Up @@ -1091,3 +1097,139 @@ def load(self, data):

else:
morsel = None


class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename

@staticmethod
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'

@contextlib.contextmanager
def open(self, file, *, write=False):
if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if write:
file.truncate(0)
yield file

def _really_save(self, f, ignore_discard=False, ignore_expires=False):
now = time.time()
for cookie in self:
if (not ignore_discard and cookie.discard
or not ignore_expires and cookie.is_expired(now)):
continue
name, value = cookie.name, cookie.value
if value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))

def save(self, filename=None, *args, **kwargs):
"""
Save cookies to a file.
Code is taken from CPython 3.6
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """

if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with self.open(filename, write=True) as f:
f.write(self._HEADER)
self._really_save(f, *args, **kwargs)

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True

def get_cookie_header(self, url):
"""Generate a Cookie HTTP header for a given url"""
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie')
2 changes: 1 addition & 1 deletion yt_dlp/extractor/common.py
Expand Up @@ -3444,7 +3444,7 @@ def _set_cookie(self, domain, name, value, expire_time=None, port=None,

def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """
return LenientSimpleCookie(self._downloader._calc_cookies(url))
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))

def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
Expand Down
3 changes: 3 additions & 0 deletions yt_dlp/utils/_legacy.py
Expand Up @@ -10,6 +10,9 @@
from .traversal import traverse_obj
from ..dependencies import certifi, websockets

# isort: split
from ..cookies import YoutubeDLCookieJar # noqa: F401

has_certifi = bool(certifi)
has_websockets = bool(websockets)

Expand Down
130 changes: 0 additions & 130 deletions yt_dlp/utils/_utils.py
Expand Up @@ -1518,136 +1518,6 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))


class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename

@staticmethod
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'

@contextlib.contextmanager
def open(self, file, *, write=False):
if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if write:
file.truncate(0)
yield file

def _really_save(self, f, ignore_discard=False, ignore_expires=False):
now = time.time()
for cookie in self:
if (not ignore_discard and cookie.discard
or not ignore_expires and cookie.is_expired(now)):
continue
name, value = cookie.name, cookie.value
if value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))

def save(self, filename=None, *args, **kwargs):
"""
Save cookies to a file.
Code is taken from CPython 3.6
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """

if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with self.open(filename, write=True) as f:
f.write(self._HEADER)
self._really_save(f, *args, **kwargs)

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True


class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
Expand Down

0 comments on commit b87e01c

Please sign in to comment.