Skip to content

Commit

Permalink
Revert "Add impersonate headers blacklist"
Browse files Browse the repository at this point in the history
This reverts commit 7729201.
  • Loading branch information
coletdjnz committed Mar 15, 2024
1 parent eb76328 commit 4286b81
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 37 deletions.
36 changes: 21 additions & 15 deletions test/test_networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,21 +785,6 @@ def test_supported_impersonate_targets(self, handler):
assert res.status == 200
assert std_headers['user-agent'].lower() not in res.read().decode().lower()

@pytest.mark.parametrize('impersonate', [True, False])
def test_headers_blacklist(self, handler, impersonate):
with handler() as rh:
for header in rh._IMPERSONATE_HEADERS_BLACKLIST:
supported_target = rh.supported_targets[0]
res = validate_and_send(rh, Request(
f'http://127.0.0.1:{self.http_port}/headers',
headers={header: 'testvalue'}, extensions={'impersonate': supported_target} if impersonate else {}))
assert res.status == 200
sent_headers = res.read().decode()
if impersonate:
assert f'{header}: testvalue'.lower() not in sent_headers.lower()
else:
assert f'{header}: testvalue'.lower() in sent_headers.lower()


class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""
Expand Down Expand Up @@ -995,6 +980,27 @@ def test_impersonate(self, handler, params, extensions):
# Check that user agent is added over ours
assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res

@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
def test_headers(self, handler):
with handler(headers=std_headers) as rh:
# Ensure curl-impersonate overrides our standard headers (usually added
res = validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()

assert std_headers['user-agent'].lower() not in res
assert std_headers['accept-language'].lower() not in res
assert std_headers['sec-fetch-mode'].lower() not in res
# other than UA, custom headers that differ from std_headers should be kept
assert 'sec-fetch-mode: custom' in res
assert 'x-custom: test' in res
# but when not impersonating don't remove std_headers
res = validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
# std_headers should be present
for k, v in std_headers.items():
assert f'{k}: {v}'.lower() in res

@pytest.mark.parametrize('raised,expected,match', [
(lambda: curl_cffi.requests.errors.RequestsError(
'', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
Expand Down
27 changes: 5 additions & 22 deletions yt_dlp/networking/impersonate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .exceptions import UnsupportedRequest
from ..compat.types import NoneType
from ..utils import classproperty
from ..utils.networking import std_headers


@dataclass(order=True, frozen=True)
Expand Down Expand Up @@ -69,26 +70,6 @@ class ImpersonateRequestHandler(RequestHandler, ABC):
"""
_SUPPORTED_IMPERSONATE_TARGET_MAP: dict[ImpersonateTarget, Any] = {}

_IMPERSONATE_HEADERS_BLACKLIST = [
# Headers to remove from provided headers when impersonating.
# In the networking framework, the provided headers are intended
# to give a consistent user agent across request handlers.
# However, it is intended that the impersonation implementation will add the required headers to mimic a client.
# So we need to remove provided headers that may interfere with this behaviour.
# TODO(future): Add a method of excluding headers from this blacklist, such as User-Agent in certain cases.
# TODO(future): "Accept" should be included here, however it is currently required for some sites.
'User-Agent',
'Accept-Language',
'Sec-Fetch-Mode',
'Sec-Fetch-Site',
'Sec-Fetch-User',
'Sec-Fetch-Dest',
'Upgrade-Insecure-Requests',
'Sec-Ch-Ua',
'Sec-Ch-Ua-Mobile',
'Sec-Ch-Ua-Platform',
]

def __init__(self, *, impersonate: ImpersonateTarget = None, **kwargs):
super().__init__(**kwargs)
self.impersonate = impersonate
Expand Down Expand Up @@ -141,8 +122,10 @@ def _get_mapped_request_target(self, request):
def _get_impersonate_headers(self, request):
headers = self._merge_headers(request.headers)
if self._get_request_target(request) is not None:
for header in self._IMPERSONATE_HEADERS_BLACKLIST:
headers.pop(header, None)
# remove all headers present in std_headers
for header in std_headers:
if header in headers and std_headers[header] == headers[header]:
headers.pop(header, None)
return headers


Expand Down

0 comments on commit 4286b81

Please sign in to comment.