Skip to content

Commit

Permalink
more refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
coletdjnz committed Jun 11, 2022
1 parent 1e9ae1c commit c3dd16b
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 159 deletions.
2 changes: 1 addition & 1 deletion test/test_YoutubeDLCookieJar.py
Expand Up @@ -10,7 +10,7 @@
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from yt_dlp.networking.common import YoutubeDLCookieJar
from yt_dlp.cookies import YoutubeDLCookieJar


class TestYoutubeDLCookieJar(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/YoutubeDL.py
Expand Up @@ -47,9 +47,9 @@
from .networking.common import (
Request,
RequestHandlerBroker,
make_std_headers,
HEADRequest
)
from .networking.utils import make_std_headers

from .networking.utils import has_certifi, get_cookie_header

Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/__init__.py
Expand Up @@ -58,7 +58,7 @@
)
from .YoutubeDL import YoutubeDL

from .networking.common import make_std_headers
from .networking.utils import make_std_headers


def get_urls(urls, batchfile, verbose):
Expand Down
124 changes: 123 additions & 1 deletion yt_dlp/cookies.py
@@ -1,16 +1,23 @@
from __future__ import unicode_literals, annotations

import collections
import contextlib
import ctypes
import io
import json
import os
import shutil
import struct
import subprocess
import sys
import tempfile
import time
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac

from .compat import compat_cookiejar, compat_str

from .aes import (
aes_cbc_decrypt_bytes,
aes_gcm_decrypt_and_verify_bytes,
Expand All @@ -23,8 +30,8 @@
from .utils import (
expand_path,
Popen,
write_string
)
from .networking.common import YoutubeDLCookieJar

try:
import sqlite3
Expand Down Expand Up @@ -956,3 +963,118 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None):
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
return browser_name, profile, keyring


class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""
Save cookies to a file.
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF-8 in both python 2 and 3.
"""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty
# string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with io.open(filename, 'w', encoding='utf-8') as f:
f.write(self._HEADER)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure:
secure = 'TRUE'
else:
secure = 'FALSE'
if cookie.domain.startswith('.'):
initial_dot = 'TRUE'
else:
initial_dot = 'FALSE'
if cookie.expires is not None:
expires = compat_str(cookie.expires)
else:
expires = ''
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ''
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
'\t'.join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value]) + '\n')

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with io.open(filename, encoding='utf-8') as f:
for line in f:
try:
cf.write(prepare_line(line))
except compat_cookiejar.LoadError as e:
write_string(
'WARNING: skipping cookie file entry due to %s: %r\n'
% (e, line), sys.stderr)
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True
3 changes: 2 additions & 1 deletion yt_dlp/extractor/common.py
Expand Up @@ -33,7 +33,8 @@
get_base_url,
remove_encrypted_media,
)
from ..networking.common import Request, update_request
from ..networking.common import Request
from ..networking.utils import update_request
from ..networking.utils import get_cookie_header
from ..utils import (
age_restricted,
Expand Down
3 changes: 2 additions & 1 deletion yt_dlp/networking/_urllib.py
Expand Up @@ -19,7 +19,8 @@
compat_urlparse, compat_HTTPError, compat_brotli
)

from .common import Response, BackendRH, Request, make_std_headers
from .common import Response, BackendRH, Request
from .utils import make_std_headers
from .socksproxy import sockssocket
from .utils import handle_youtubedl_headers, socks_create_proxy_args, ssl_load_certs, select_proxy
from ..utils import (
Expand Down
153 changes: 1 addition & 152 deletions yt_dlp/networking/common.py
@@ -1,29 +1,21 @@
from __future__ import unicode_literals
from __future__ import annotations

import collections
import email.policy
import inspect
import io
import ssl
import sys
import time
import typing
import urllib.parse
from email.message import Message
from http import HTTPStatus
import urllib.request
import urllib.response
from typing import Union, Type, List

from ..compat import compat_cookiejar, compat_str
from typing import Union

from ..utils import (
extract_basic_auth,
escape_url,
sanitize_url,
write_string,
std_headers,
update_url_query,
bug_reports_message,
YoutubeDLError,
Expand All @@ -32,8 +24,6 @@
UnsupportedRequest
)

from .utils import random_user_agent

if typing.TYPE_CHECKING:
from ..YoutubeDL import YoutubeDL

Expand Down Expand Up @@ -175,18 +165,6 @@ def method(self):
return 'PUT'


def update_request(req: Request, url: str = None, data=None,
headers: typing.Mapping = None, query: dict = None):
"""
Creates a copy of the request and updates relevant fields
"""
req = req.copy()
req.data = data or req.data
req.headers.update(headers or {})
req.url = update_url_query(url or req.url, query or {})
return req


class Response(io.IOBase):
"""
Abstract base class for HTTP response adapters.
Expand Down Expand Up @@ -418,132 +396,3 @@ def send(self, request: Union[Request, str, urllib.request.Request]) -> Response
assert isinstance(res, Response)
return res
raise YoutubeDLError('No request handlers configured that could handle this request.')


class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""
Save cookies to a file.
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF-8 in both python 2 and 3.
"""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

# Store session cookies with `expires` set to 0 instead of an empty
# string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0

with io.open(filename, 'w', encoding='utf-8') as f:
f.write(self._HEADER)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure:
secure = 'TRUE'
else:
secure = 'FALSE'
if cookie.domain.startswith('.'):
initial_dot = 'TRUE'
else:
initial_dot = 'FALSE'
if cookie.expires is not None:
expires = compat_str(cookie.expires)
else:
expires = ''
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ''
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
'\t'.join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value]) + '\n')

def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line

cf = io.StringIO()
with io.open(filename, encoding='utf-8') as f:
for line in f:
try:
cf.write(prepare_line(line))
except compat_cookiejar.LoadError as e:
write_string(
'WARNING: skipping cookie file entry due to %s: %r\n'
% (e, line), sys.stderr)
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self:
# Treat `expires=0` cookies as session cookies
if cookie.expires == 0:
cookie.expires = None
cookie.discard = True


# Use make_std_headers() to get a copy of these
_std_headers = CaseInsensitiveDict({
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
})


# Get a copy of std headers, while also retaining backwards compat with utils.std_headers
def make_std_headers():
return CaseInsensitiveDict(_std_headers, std_headers)

0 comments on commit c3dd16b

Please sign in to comment.