Skip to content

Commit

Permalink
Merge pull request #49 from zalando-incubator/optimize-blacklist
Browse files Browse the repository at this point in the history
Read the blacklist once
  • Loading branch information
tortila committed Apr 29, 2019
2 parents c23f291 + afb067b commit 8b12d5c
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 54 deletions.
17 changes: 17 additions & 0 deletions docs/Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ The format is based on `Keep a Changelog`_, and this project adheres to
:local:
:depth: 1

.. _v1.1.3:

v1.1.3
======

- Release date: 2019-04-26 16:44

- Diff__.

__ https://github.com/zalando-incubator/transformer/compare/v1.1.2...v1.1.3

Changed
-------

Blacklisting mechanism now opens the `.urlignore` file once per execution of the program,
instead of once per :class:`Request <transformer.request.Request>`.

.. _v1.1.2:

v1.1.2
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# The short X.Y version
version = "1.1"
# The full version, including alpha/beta/rc tags
release = "1.1.2"
release = "1.1.3"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "har-transformer"
version = "1.1.2"
version = "1.1.3"
description = "A tool to convert HAR files into a locustfile."
authors = [
"Serhii Cherniavskyi <serhii.cherniavskyi@zalando.de>",
Expand Down
37 changes: 22 additions & 15 deletions transformer/blacklist.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
import os
import logging
import os
from typing import Set

Blacklist = Set[str]

def on_blacklist(url):
"""
Checks for matching URLs in an ignore file (blacklist)
from user's current directory.
"""
blacklist_file = f"{os.getcwd()}/.urlignore"
try:
with open(blacklist_file) as file:
blacklist = [line.rstrip("\n") for line in file if len(line) > 1]

for blacklist_item in blacklist:
if blacklist_item in url:
return True
def get_empty() -> Blacklist:
return set()

return False

def from_file() -> Blacklist:
blacklist_file = f"{os.getcwd()}/.urlignore"
try:
with open(blacklist_file) as file:
return set(filter(None, [line.rstrip() for line in file]))
except OSError as err:
logging.debug(
"Could not read blacklist file %s. Reason: %s", blacklist_file, err
)
return False
return get_empty()


def on_blacklist(blacklist: Blacklist, url: str) -> bool:
"""
Checks for matching URLs in an ignore file (blacklist)
from user's current directory.
"""
for blacklist_item in blacklist:
if blacklist_item in url:
return True
return False
29 changes: 25 additions & 4 deletions transformer/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from dataclasses import dataclass

import transformer.plugins as plug
from transformer.blacklist import Blacklist, get_empty
from transformer.naming import to_identifier
from transformer.plugins.contracts import Plugin
from transformer.request import Request
Expand Down Expand Up @@ -123,6 +124,7 @@ def from_path(
plugins: Sequence[Plugin] = (),
ts_plugins: Sequence[Plugin] = (),
short_name: bool = False,
blacklist: Optional[Blacklist] = None,
) -> "Scenario":
"""
Makes a :class:`Scenario` (possibly containing sub-scenarios) out of
Expand All @@ -145,14 +147,26 @@ def from_path(
but *True* when generating sub-scenarios (:attr:`children`) from
a directory *path* (because then the names are "scoped" by
the parent directory).
:param blacklist: a set of urls to be blacklisted
"""
if blacklist is None:
blacklist = get_empty()

if path.is_dir():
return cls.from_dir(
path, plugins, ts_plugins=ts_plugins, short_name=short_name
path,
plugins=plugins,
ts_plugins=ts_plugins,
short_name=short_name,
blacklist=blacklist,
)
else:
return cls.from_har_file(
path, plugins, ts_plugins=ts_plugins, short_name=short_name
path,
plugins,
ts_plugins=ts_plugins,
short_name=short_name,
blacklist=blacklist,
)

@classmethod
Expand All @@ -162,6 +176,7 @@ def from_dir(
plugins: Sequence[Plugin],
ts_plugins: Sequence[Plugin],
short_name: bool,
blacklist: Blacklist,
) -> "Scenario":
"""
Makes a :class:`Scenario` out of the provided directory *path*.
Expand Down Expand Up @@ -200,6 +215,7 @@ def from_dir(
that class name is guaranteed to be unique across all TaskSets of the
locustfile, but this is generally not necessary and results in less
readable class names.
:param blacklist: a sequence of urls to be blacklisted
:raise SkippableScenarioError: if the directory contains dangling weight
files or no sub-scenarios.
"""
Expand All @@ -218,7 +234,11 @@ def from_dir(
continue
try:
scenario = cls.from_path(
child, plugins, ts_plugins=ts_plugins, short_name=True
child,
plugins,
ts_plugins=ts_plugins,
short_name=True,
blacklist=blacklist,
)
except SkippableScenarioError as err:
logging.warning(
Expand Down Expand Up @@ -284,6 +304,7 @@ def from_har_file(
plugins: Sequence[Plugin],
ts_plugins: Sequence[Plugin],
short_name: bool,
blacklist: Blacklist,
) -> "Scenario":
"""
Creates a Scenario given a HAR file.
Expand All @@ -294,7 +315,7 @@ def from_har_file(
with path.open() as file:
har = json.load(file)
requests = Request.all_from_har(har)
tasks = Task.from_requests(requests)
tasks = Task.from_requests(requests, blacklist)

# TODO: Remove this when Contract.OnTaskSequence is removed.
tasks = plug.apply(ts_plugins, tasks)
Expand Down
10 changes: 7 additions & 3 deletions transformer/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from dataclasses import dataclass

import transformer.python as py
from transformer.blacklist import on_blacklist
from transformer.blacklist import on_blacklist, Blacklist, get_empty
from transformer.helpers import zip_kv_pairs
from transformer.request import HttpMethod, Request, QueryPair

Expand Down Expand Up @@ -293,13 +293,17 @@ class Task(NamedTuple):
global_code_blocks: Mapping[str, Sequence[str]] = MappingProxyType({})

@classmethod
def from_requests(cls, requests: Iterable[Request]) -> Iterator["Task"]:
def from_requests(
cls, requests: Iterable[Request], blacklist: Optional[Blacklist] = None
) -> Iterator["Task"]:
"""
Generates a set of Tasks from a given set of Requests.
"""
if blacklist is None:
blacklist = get_empty()

for req in sorted(requests, key=lambda r: r.timestamp):
if on_blacklist(req.url.netloc):
if on_blacklist(blacklist, req.url.netloc):
continue
else:
yield cls(name=req.task_name(), request=req)
Expand Down
25 changes: 16 additions & 9 deletions transformer/test_blacklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from unittest.mock import patch

from transformer.blacklist import on_blacklist
from transformer.blacklist import on_blacklist, from_file as read_blacklist


class TestBlacklist:
Expand All @@ -16,30 +16,37 @@ def test_it_returns_false_and_logs_error_if_the_blacklist_does_not_exist(
):
mock_open.side_effect = FileNotFoundError
caplog.set_level(logging.DEBUG)
assert on_blacklist("") is False
blacklist = read_blacklist()
assert len(blacklist) == 0
assert on_blacklist(blacklist, "whatever") is False
assert f"Could not read blacklist file {os.getcwd()}/.urlignore" in caplog.text

@patch("builtins.open")
def test_it_returns_false_if_the_blacklist_is_empty(self, mock_open):
mock_open.return_value = io.StringIO("")
assert on_blacklist("") is False
assert on_blacklist(read_blacklist(), "") is False

@patch("builtins.open")
def test_it_returns_false_if_url_is_not_on_blacklist(self, mock_open):
mock_open.return_value = io.StringIO("www.amazon.com")
assert on_blacklist("www.zalando.de") is False
assert on_blacklist(read_blacklist(), "www.zalando.de") is False

@patch("builtins.open")
def test_it_returns_true_if_url_is_on_blacklist(self, mock_open):
mock_open.return_value = io.StringIO("www.google.com\nwww.amazon.com")
assert on_blacklist("www.amazon.com") is True
assert on_blacklist(read_blacklist(), "www.amazon.com") is True

@patch("builtins.open")
def test_it_returns_true_if_a_partial_match_is_found(self, mock_open):
mock_open.return_value = io.StringIO("www.amazon.com")
assert on_blacklist("http://www.amazon.com/") is True
assert on_blacklist(read_blacklist(), "http://www.amazon.com/") is True

@patch("builtins.open")
def test_it_ignores_empty_lines(self, mock_open):
mock_open.return_value = io.StringIO("\nwww.amazon.com")
assert on_blacklist("www.zalando.de") is False
def test_it_ignores_whitespace_only_lines(self, mock_open):
mock_open.return_value = io.StringIO(" \n \r\nwww.amazon.com")
assert on_blacklist(read_blacklist(), "www.zalando.de") is False

@patch("builtins.open")
def test_it_removes_duplicate_entries(self, mock_open):
mock_open.return_value = io.StringIO("\nwww.amazon.com" * 3)
assert len(read_blacklist()) == 1
18 changes: 4 additions & 14 deletions transformer/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,14 @@ def test_it_returns_a_request_with_a_query_given_a_delete_request_with_a_query(

def test_it_records_har_entry(self):
entry = {
"request": {
"method": "GET",
"url": "localhost"
},
"response": {
"status": 200,
"statusText": "OK",
},
"request": {"method": "GET", "url": "localhost"},
"response": {"status": 200, "statusText": "OK"},
"cache": {},
"timings": {
"connect": 22,
"wait": 46,
"receive": 0
},
"timings": {"connect": 22, "wait": 46, "receive": 0},
"startedDateTime": "2018-01-01",
"time": 116,
"_securityState": "secure",
"connection": "443"
"connection": "443",
}
request = Request.from_har_entry(entry)
assert isinstance(request, Request)
Expand Down
12 changes: 8 additions & 4 deletions transformer/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hypothesis import given
from hypothesis.strategies import composite, sampled_from, booleans

from transformer import python as py
from transformer import python as py, blacklist
from transformer.request import Header, QueryPair
from transformer.task import (
Task,
Expand Down Expand Up @@ -45,7 +45,7 @@ def test_it_doesnt_create_a_task_if_the_url_is_on_the_blacklist(
request = MagicMock()
request.url = MagicMock()
request.url.netloc = "www.amazon.com"
task = Task.from_requests([request])
task = Task.from_requests([request], blacklist=blacklist.from_file())
assert len(list(task)) == 0

@patch("builtins.open")
Expand Down Expand Up @@ -364,7 +364,11 @@ def test_it_uses_the_custom_name_if_provided(self):
url = "http://abc.de"
name = "my-req"
r = Request(
name=name, timestamp=MagicMock(), method=HttpMethod.GET, url=urlparse(url), har_entry={"entry": "data"}
name=name,
timestamp=MagicMock(),
method=HttpMethod.GET,
url=urlparse(url),
har_entry={"entry": "data"},
)
assert req_to_expr(r) == py.FunctionCall(
name="self.client.get",
Expand Down Expand Up @@ -560,7 +564,7 @@ def test_it_uses_the_custom_name_if_provided(self):
timestamp=MagicMock(),
method=HttpMethod.GET,
url=urlparse(url),
har_entry={"entry": "data"}
har_entry={"entry": "data"},
)
)
assert lreq_to_expr(r) == py.FunctionCall(
Expand Down
15 changes: 12 additions & 3 deletions transformer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from transformer.plugins import sanitize_headers, Contract
from transformer.plugins.contracts import Plugin
from transformer.scenario import Scenario
from transformer import blacklist

DEFAULT_PLUGINS = (sanitize_headers.plugin,)

Expand All @@ -36,7 +37,13 @@ def transform(
warnings.warn(DeprecationWarning("transform: use dump or dumps instead"))
if with_default_plugins:
plugins = (*DEFAULT_PLUGINS, *plugins)
return locustfile([Scenario.from_path(Path(scenarios_path), plugins)])
return locustfile(
[
Scenario.from_path(
Path(scenarios_path), plugins, blacklist=blacklist.from_file()
)
]
)


LaxPath = Union[str, Path]
Expand Down Expand Up @@ -96,10 +103,12 @@ def _dump_as_lines(
plugins = (*DEFAULT_PLUGINS, *plugins)

plugins_for = plug.group_by_contract(plugins)

scenarios = [
Scenario.from_path(
path, plugins_for[Contract.OnTask], plugins_for[Contract.OnTaskSequence]
path,
plugins_for[Contract.OnTask],
plugins_for[Contract.OnTaskSequence],
blacklist=blacklist.from_file(),
).apply_plugins(plugins_for[Contract.OnScenario])
for path in scenario_paths
]
Expand Down

0 comments on commit 8b12d5c

Please sign in to comment.