Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cookies] Move YouTubeDLCookieJar to cookies.py #7091

Merged
merged 5 commits into from May 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 7 additions & 1 deletion test/test_YoutubeDLCookieJar.py
Expand Up @@ -11,7 +11,7 @@
import re
import tempfile

from yt_dlp.utils import YoutubeDLCookieJar
from yt_dlp.cookies import YoutubeDLCookieJar


class TestYoutubeDLCookieJar(unittest.TestCase):
Expand Down Expand Up @@ -47,6 +47,12 @@ def test_malformed_cookies(self):
# will be ignored
self.assertFalse(cookiejar._cookies)

def test_get_cookie_header(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
self.assertIn('HTTPONLY_COOKIE', header)


if __name__ == '__main__':
unittest.main()
7 changes: 3 additions & 4 deletions yt_dlp/YoutubeDL.py
Expand Up @@ -2383,7 +2383,7 @@ def _calc_headers(self, info_dict):
if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity'
cookies = self._calc_cookies(info_dict['url'])
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
if cookies:
res['Cookie'] = cookies

Expand All @@ -2395,9 +2395,8 @@ def _calc_headers(self, info_dict):
return res

def _calc_cookies(self, url):
pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
return self.cookiejar.get_cookie_header(url)

def _sort_thumbnails(self, thumbnails):
thumbnails.sort(key=lambda t: (
Expand Down
144 changes: 143 additions & 1 deletion yt_dlp/cookies.py
@@ -1,7 +1,9 @@
import base64
import collections
import contextlib
import http.cookiejar
import http.cookies
import io
import json
import os
import re
Expand All @@ -11,6 +13,7 @@
import sys
import tempfile
import time
import urllib.request
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
Expand All @@ -29,11 +32,14 @@
from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
Popen,
YoutubeDLCookieJar,
error_to_str,
escape_url,
expand_path,
is_path_like,
sanitize_url,
str_or_none,
try_call,
write_string,
)

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
Expand Down Expand Up @@ -1091,3 +1097,139 @@ def load(self, data):

else:
morsel = None


class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.

1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.

'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
pukkandan marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename

@staticmethod
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'

@contextlib.contextmanager
def open(self, file, *, write=False):
if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if write:
file.truncate(0)
yield file

def _really_save(self, f, ignore_discard=False, ignore_expires=False):
now = time.time()
for cookie in self:
if (not ignore_discard and cookie.discard
or not ignore_expires and cookie.is_expired(now)):
continue
name, value = cookie.name, cookie.value
if value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))

def save(self, filename=None, *args, **kwargs):
"""
Save cookies to a file.
Code is taken from CPython 3.6
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """

if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with self.open(filename, write=True) as f:
f.write(self._HEADER)
self._really_save(f, *args, **kwargs)

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
pukkandan marked this conversation as resolved.
Show resolved Hide resolved
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True

def get_cookie_header(self, url):
"""Generate a Cookie HTTP header for a given url"""
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie')
2 changes: 1 addition & 1 deletion yt_dlp/extractor/common.py
Expand Up @@ -3440,7 +3440,7 @@ def _set_cookie(self, domain, name, value, expire_time=None, port=None,

def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """
return LenientSimpleCookie(self._downloader._calc_cookies(url))
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))

def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
Expand Down
3 changes: 3 additions & 0 deletions yt_dlp/utils/_legacy.py
Expand Up @@ -10,6 +10,9 @@
from .traversal import traverse_obj
from ..dependencies import certifi, websockets

# isort: split
from ..cookies import YoutubeDLCookieJar # noqa: F401

has_certifi = bool(certifi)
has_websockets = bool(websockets)

Expand Down
130 changes: 0 additions & 130 deletions yt_dlp/utils/_utils.py
Expand Up @@ -1511,136 +1511,6 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))


class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.

1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.

'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename

@staticmethod
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'

@contextlib.contextmanager
def open(self, file, *, write=False):
if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if write:
file.truncate(0)
yield file

def _really_save(self, f, ignore_discard=False, ignore_expires=False):
now = time.time()
for cookie in self:
if (not ignore_discard and cookie.discard
or not ignore_expires and cookie.is_expired(now)):
continue
name, value = cookie.name, cookie.value
if value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))

def save(self, filename=None, *args, **kwargs):
"""
Save cookies to a file.
Code is taken from CPython 3.6
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """

if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with self.open(filename, write=True) as f:
f.write(self._HEADER)
self._really_save(f, *args, **kwargs)

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True


class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
Expand Down