Skip to content

Commit

Permalink
Compute z-score (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed May 27, 2021
1 parent 2674bd3 commit bfbf663
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 113 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
flake8
- name: Run tests
run: |
coverage run --source conbench -m pytest conbench/tests/
coverage run --source conbench -m pytest -v conbench/tests/
env:
DB_USERNAME: postgres
- name: Publish coverage
Expand Down
16 changes: 7 additions & 9 deletions conbench/api/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ..api._comparator import BenchmarkComparator, BenchmarkListComparator
from ..api._endpoint import ApiEndpoint
from ..entities._entity import NotFound
from ..entities.distribution import set_z_scores
from ..entities.summary import Summary
from ..hacks import set_display_batch, set_display_name

Expand All @@ -19,7 +20,7 @@ def _compare_entity(summary):
"benchmark": summary.display_name,
"batch": summary.display_batch,
"tags": summary.case.tags,
"z_score": 0.0, # TODO
"z_score": summary.z_score,
}


Expand All @@ -29,6 +30,7 @@ def _get(self, benchmark_id):
summary = Summary.one(id=benchmark_id)
except NotFound:
self.abort_404_not_found()
set_z_scores([summary])
return summary

def get(self, compare_ids):
Expand Down Expand Up @@ -103,12 +105,10 @@ def get(self, compare_ids):

class CompareBatchesAPI(ApiEndpoint):
def _get(self, batch_id):
try:
summaries = Summary.all(batch_id=batch_id)
except NotFound:
self.abort_404_not_found()
summaries = Summary.all(batch_id=batch_id)
if not summaries:
self.abort_404_not_found()
set_z_scores(summaries)
return summaries

def get(self, compare_ids):
Expand Down Expand Up @@ -205,12 +205,10 @@ def _add_pair(self, pairs, summary, kind):

class CompareRunsAPI(CompareBatchesAPI):
def _get(self, run_id):
try:
summaries = Summary.all(run_id=run_id)
except NotFound:
self.abort_404_not_found()
summaries = Summary.all(run_id=run_id)
if not summaries:
self.abort_404_not_found()
set_z_scores(summaries)
return summaries


Expand Down
81 changes: 35 additions & 46 deletions conbench/entities/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,50 +148,39 @@ def update_distribution(repository, sha, summary, limit):
conn.commit()


q = """SELECT
run_id,
summary.case_id,
summary.machine_id,
machine.name AS machine_name,
summary.unit,
summary.time_unit,
summary.min,
summary.max,
summary.mean,
summary.median,
commit.timestamp AS commit_timestamp,
commit.repository AS commit_repository,
(summary.mean - distribution.mean_mean) / distribution.mean_sd AS mean_z,
(summary.min - distribution.min_mean) / distribution.min_sd AS min_z,
(summary.max - distribution.max_mean) / distribution.max_sd AS max_z,
(summary.median - distribution.median_mean) / distribution.median_sd AS median_z,
commit.timestamp AS commit_timestamp,
commit.repository AS commit_repository,
distribution.*
FROM summary
INNER JOIN run
ON summary.run_id = run.id
INNER JOIN commit
ON commit.id = run.commit_id
INNER JOIN machine
ON summary.machine_id = machine.id
LEFT JOIN distribution
ON summary.case_id = distribution.case_id AND machine.name = distribution.machine_name
WHERE run.name = 'commit: {{to_compare_sha}}'"""


def get_z_score(repository, sha, case_id, context_id, machine_hash, mean):
result = list(
Session.query(Distribution.mean_mean, Distribution.mean_sd).filter(
Distribution.repository == repository,
Distribution.sha == sha,
Distribution.case_id == case_id,
Distribution.context_id == context_id,
Distribution.machine_hash == machine_hash,
def set_z_scores(summaries):
if not summaries:
return

first = summaries[0]
repository = first.run.commit.repository
sha = first.run.commit.sha
machine_hash = first.machine.hash

where = [
Distribution.repository == repository,
Distribution.sha == sha,
Distribution.machine_hash == machine_hash,
]
if len(summaries) == 1:
where.extend(
[
Distribution.case_id == first.case_id,
Distribution.context_id == first.context_id,
]
)
)
if result:
distribution_mean = result[0]["mean_mean"]
distribution_sd = result[0]["mean_sd"]
return (mean - distribution_mean) / distribution_sd
return None

cols = [
Distribution.case_id,
Distribution.context_id,
Distribution.mean_mean,
Distribution.mean_sd,
]
distributions = Session.query(*cols).filter(*where).all()
lookup = {f"{d.case_id}-{d.context_id}": d for d in distributions}

for summary in summaries:
summary.z_score = 0
d = lookup.get(f"{summary.case_id}-{summary.context_id}")
if d and d.mean_sd:
summary.z_score = (summary.mean - d.mean_mean) / d.mean_sd
106 changes: 78 additions & 28 deletions conbench/tests/api/test_compare.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import copy
import datetime
import uuid

from ...api._examples import _api_compare_entity, _api_compare_list
from ...entities.summary import Summary
from ...runner import Conbench
from ...tests.api import _asserts
from ...tests.api.test_benchmarks import VALID_PAYLOAD

Expand All @@ -15,13 +17,23 @@ def __init__(self, _id):
self.id = _id


def create_benchmark_summary(name, batch_id=None, run_id=None):
def create_benchmark_summary(name, batch_id=None, run_id=None, results=None):
data = copy.deepcopy(VALID_PAYLOAD)
data["tags"]["name"] = name
if batch_id:
data["stats"]["batch_id"] = batch_id
if run_id:
data["stats"]["run_id"] = run_id
if results is not None:
conbench = Conbench()
run_id = data["stats"]["run_id"]
run_name = data["stats"]["run_name"]
batch_id = data["stats"]["batch_id"]
now = datetime.datetime.now(datetime.timezone.utc)
data["stats"] = conbench._stats(
results, "s", [], "s", now.isoformat(), run_id, run_name
)
data["stats"]["batch_id"] = batch_id
summary = Summary.create(data)
return summary

Expand All @@ -30,21 +42,30 @@ class TestCompareBenchmarksGet(_asserts.GetEnforcer):
url = "/api/compare/benchmarks/{}/"
public = True

def _create(self, with_ids=False):
summary = create_benchmark_summary("read")
entity = FakeEntity(f"{summary.id}...{summary.id}")
def _create(self, name=None, with_ids=False):
if name is None:
name = uuid.uuid4().hex

# create a distribution history
for _ in range(10):
summary_1 = create_benchmark_summary(name, results=[1, 2, 3])

# create a regression
summary_2 = create_benchmark_summary(name, results=[4, 5, 6])

entity = FakeEntity(f"{summary_1.id}...{summary_2.id}")
if with_ids:
return summary.id, entity
return summary_1.id, summary_2.id, entity
else:
return entity

def test_compare(self, client):
self.authenticate(client)
new_id, compare = self._create(with_ids=True)
name = uuid.uuid4().hex
id_1, id_2, compare = self._create(name, with_ids=True)
response = client.get(f"/api/compare/benchmarks/{compare.id}/")

# cheating by comparing benchmark to same benchmark
benchmark_ids = [new_id, new_id]
benchmark_ids = [id_1, id_2]
batch_ids = [
"7b2fdd9f929d47b9960152090d47f8e6",
"7b2fdd9f929d47b9960152090d47f8e6",
Expand All @@ -57,17 +78,28 @@ def test_compare(self, client):
benchmark_ids,
batch_ids,
run_ids,
"read",
name,
CASE,
tags={
"dataset": "nyctaxi_sample",
"cpu_count": 2,
"file_type": "parquet",
"input_type": "arrow",
"compression": "snappy",
"name": "read",
"name": name,
},
)
expected.update(
{
"baseline": "2.000 s",
"contender": "5.000 s",
"change": "150.000%",
"regression": True,
"baseline_z_score": "-0.302",
"contender_z_score": "3.015",
"contender_regression_z": True,
}
)
self.assert_200_ok(response, expected)

def test_compare_unknown_compare_ids(self, client):
Expand All @@ -80,11 +112,19 @@ class TestCompareBatchesGet(_asserts.GetEnforcer):
url = "/api/compare/batches/{}/"
public = True

def _create(self, with_ids=False, batch_id=None):
def _create(self, with_ids=False, run_id=None, batch_id=None):
if batch_id is None:
batch_id = uuid.uuid4().hex
summary1 = create_benchmark_summary("read", batch_id=batch_id)
summary2 = create_benchmark_summary("write", batch_id=batch_id)
summary1 = create_benchmark_summary(
"read",
run_id=run_id,
batch_id=batch_id,
)
summary2 = create_benchmark_summary(
"write",
run_id=run_id,
batch_id=batch_id,
)
entity = FakeEntity(f"{batch_id}...{batch_id}")
if with_ids:
return [summary1.id, summary2.id], entity
Expand All @@ -93,16 +133,17 @@ def _create(self, with_ids=False, batch_id=None):

def test_compare(self, client):
self.authenticate(client)
batch_id = uuid.uuid4().hex
new_ids, compare = self._create(with_ids=True, batch_id=batch_id)
run_id, batch_id = uuid.uuid4().hex, uuid.uuid4().hex
new_ids, compare = self._create(
with_ids=True,
run_id=run_id,
batch_id=batch_id,
)
response = client.get(f"/api/compare/batches/{compare.id}/")

# cheating by comparing batch to same batch
batch_ids = [batch_id, batch_id]
run_ids = [
"2a5709d179f349cba69ed242be3e6321",
"2a5709d179f349cba69ed242be3e6321",
]
run_ids = [run_id, run_id]
batches = ["read", "write"]
benchmarks = [CASE, CASE]
expected = _api_compare_list(
Expand Down Expand Up @@ -143,11 +184,19 @@ class TestCompareRunsGet(_asserts.GetEnforcer):
url = "/api/compare/runs/{}/"
public = True

def _create(self, with_ids=False, run_id=None):
def _create(self, with_ids=False, run_id=None, batch_id=None):
if run_id is None:
run_id = uuid.uuid4().hex
summary1 = create_benchmark_summary("read", run_id=run_id)
summary2 = create_benchmark_summary("write", run_id=run_id)
summary1 = create_benchmark_summary(
"read",
run_id=run_id,
batch_id=batch_id,
)
summary2 = create_benchmark_summary(
"write",
run_id=run_id,
batch_id=batch_id,
)
entity = FakeEntity(f"{run_id}...{run_id}")
if with_ids:
return [summary1.id, summary2.id], entity
Expand All @@ -156,16 +205,17 @@ def _create(self, with_ids=False, run_id=None):

def test_compare(self, client):
self.authenticate(client)
run_id = uuid.uuid4().hex
new_ids, compare = self._create(with_ids=True, run_id=run_id)
run_id, batch_id = uuid.uuid4().hex, uuid.uuid4().hex
new_ids, compare = self._create(
with_ids=True,
run_id=run_id,
batch_id=batch_id,
)
response = client.get(f"/api/compare/runs/{compare.id}/")

# cheating by comparing run to same run
run_ids = [run_id, run_id]
batch_ids = [
"7b2fdd9f929d47b9960152090d47f8e6",
"7b2fdd9f929d47b9960152090d47f8e6",
]
batch_ids = [batch_id, batch_id]
batches = ["read", "write"]
benchmarks = [CASE, CASE]
expected = _api_compare_list(
Expand Down
4 changes: 4 additions & 0 deletions conbench/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from ..db import Session, configure_engine, create_all, drop_all


pytest.register_assert_rewrite("conbench.tests.api._asserts")
pytest.register_assert_rewrite("conbench.tests.app._asserts")


@pytest.fixture(scope="session", autouse=True)
def create_db():
configure_engine(TestConfig.SQLALCHEMY_DATABASE_URI)
Expand Down

0 comments on commit bfbf663

Please sign in to comment.