Compute z-score (#77)

conbench · May 27, 2021 · bfbf663 · bfbf663
1 parent 2674bd3
commit bfbf663
Show file tree

Hide file tree

Showing 6 changed files with 182 additions and 113 deletions.
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
@@ -42,7 +42,7 @@ jobs:
           flake8
       - name: Run tests
         run: |
-          coverage run --source conbench -m pytest conbench/tests/
+          coverage run --source conbench -m pytest -v conbench/tests/
         env:
           DB_USERNAME: postgres
       - name: Publish coverage

diff --git a/conbench/api/compare.py b/conbench/api/compare.py
@@ -5,6 +5,7 @@
 from ..api._comparator import BenchmarkComparator, BenchmarkListComparator
 from ..api._endpoint import ApiEndpoint
 from ..entities._entity import NotFound
+from ..entities.distribution import set_z_scores
 from ..entities.summary import Summary
 from ..hacks import set_display_batch, set_display_name
 
@@ -19,7 +20,7 @@ def _compare_entity(summary):
         "benchmark": summary.display_name,
         "batch": summary.display_batch,
         "tags": summary.case.tags,
-        "z_score": 0.0,  # TODO
+        "z_score": summary.z_score,
     }
 
 
@@ -29,6 +30,7 @@ def _get(self, benchmark_id):
             summary = Summary.one(id=benchmark_id)
         except NotFound:
             self.abort_404_not_found()
+        set_z_scores([summary])
         return summary
 
     def get(self, compare_ids):
@@ -103,12 +105,10 @@ def get(self, compare_ids):
 
 class CompareBatchesAPI(ApiEndpoint):
     def _get(self, batch_id):
-        try:
-            summaries = Summary.all(batch_id=batch_id)
-        except NotFound:
-            self.abort_404_not_found()
+        summaries = Summary.all(batch_id=batch_id)
         if not summaries:
             self.abort_404_not_found()
+        set_z_scores(summaries)
         return summaries
 
     def get(self, compare_ids):
@@ -205,12 +205,10 @@ def _add_pair(self, pairs, summary, kind):
 
 class CompareRunsAPI(CompareBatchesAPI):
     def _get(self, run_id):
-        try:
-            summaries = Summary.all(run_id=run_id)
-        except NotFound:
-            self.abort_404_not_found()
+        summaries = Summary.all(run_id=run_id)
         if not summaries:
             self.abort_404_not_found()
+        set_z_scores(summaries)
         return summaries
 
 

diff --git a/conbench/entities/distribution.py b/conbench/entities/distribution.py
@@ -148,50 +148,39 @@ def update_distribution(repository, sha, summary, limit):
         conn.commit()
 
 
-q = """SELECT
-run_id,
-summary.case_id,
-summary.machine_id,
-machine.name AS machine_name,
-summary.unit,
-summary.time_unit,
-summary.min,
-summary.max,
-summary.mean,
-summary.median,
-commit.timestamp AS commit_timestamp,
-commit.repository AS commit_repository,
-(summary.mean - distribution.mean_mean) / distribution.mean_sd AS mean_z,
-(summary.min - distribution.min_mean) / distribution.min_sd AS min_z,
-(summary.max - distribution.max_mean) / distribution.max_sd AS max_z,
-(summary.median - distribution.median_mean) / distribution.median_sd AS median_z,
-commit.timestamp AS commit_timestamp,
-commit.repository AS commit_repository,
-distribution.*
-FROM summary
-INNER JOIN run
-ON summary.run_id = run.id
-INNER JOIN commit
-ON commit.id = run.commit_id
-INNER JOIN machine
-ON summary.machine_id = machine.id
-LEFT JOIN distribution
-ON summary.case_id = distribution.case_id AND machine.name = distribution.machine_name
-WHERE run.name = 'commit: {{to_compare_sha}}'"""
-
-
-def get_z_score(repository, sha, case_id, context_id, machine_hash, mean):
-    result = list(
-        Session.query(Distribution.mean_mean, Distribution.mean_sd).filter(
-            Distribution.repository == repository,
-            Distribution.sha == sha,
-            Distribution.case_id == case_id,
-            Distribution.context_id == context_id,
-            Distribution.machine_hash == machine_hash,
+def set_z_scores(summaries):
+    if not summaries:
+        return
+
+    first = summaries[0]
+    repository = first.run.commit.repository
+    sha = first.run.commit.sha
+    machine_hash = first.machine.hash
+
+    where = [
+        Distribution.repository == repository,
+        Distribution.sha == sha,
+        Distribution.machine_hash == machine_hash,
+    ]
+    if len(summaries) == 1:
+        where.extend(
+            [
+                Distribution.case_id == first.case_id,
+                Distribution.context_id == first.context_id,
+            ]
         )
-    )
-    if result:
-        distribution_mean = result[0]["mean_mean"]
-        distribution_sd = result[0]["mean_sd"]
-        return (mean - distribution_mean) / distribution_sd
-    return None
+
+    cols = [
+        Distribution.case_id,
+        Distribution.context_id,
+        Distribution.mean_mean,
+        Distribution.mean_sd,
+    ]
+    distributions = Session.query(*cols).filter(*where).all()
+    lookup = {f"{d.case_id}-{d.context_id}": d for d in distributions}
+
+    for summary in summaries:
+        summary.z_score = 0
+        d = lookup.get(f"{summary.case_id}-{summary.context_id}")
+        if d and d.mean_sd:
+            summary.z_score = (summary.mean - d.mean_mean) / d.mean_sd
diff --git a/conbench/tests/api/test_compare.py b/conbench/tests/api/test_compare.py
@@ -1,8 +1,10 @@
 import copy
+import datetime
 import uuid
 
 from ...api._examples import _api_compare_entity, _api_compare_list
 from ...entities.summary import Summary
+from ...runner import Conbench
 from ...tests.api import _asserts
 from ...tests.api.test_benchmarks import VALID_PAYLOAD
 
@@ -15,13 +17,23 @@ def __init__(self, _id):
         self.id = _id
 
 
-def create_benchmark_summary(name, batch_id=None, run_id=None):
+def create_benchmark_summary(name, batch_id=None, run_id=None, results=None):
     data = copy.deepcopy(VALID_PAYLOAD)
     data["tags"]["name"] = name
     if batch_id:
         data["stats"]["batch_id"] = batch_id
     if run_id:
         data["stats"]["run_id"] = run_id
+    if results is not None:
+        conbench = Conbench()
+        run_id = data["stats"]["run_id"]
+        run_name = data["stats"]["run_name"]
+        batch_id = data["stats"]["batch_id"]
+        now = datetime.datetime.now(datetime.timezone.utc)
+        data["stats"] = conbench._stats(
+            results, "s", [], "s", now.isoformat(), run_id, run_name
+        )
+        data["stats"]["batch_id"] = batch_id
     summary = Summary.create(data)
     return summary
 
@@ -30,21 +42,30 @@ class TestCompareBenchmarksGet(_asserts.GetEnforcer):
     url = "/api/compare/benchmarks/{}/"
     public = True
 
-    def _create(self, with_ids=False):
-        summary = create_benchmark_summary("read")
-        entity = FakeEntity(f"{summary.id}...{summary.id}")
+    def _create(self, name=None, with_ids=False):
+        if name is None:
+            name = uuid.uuid4().hex
+
+        # create a distribution history
+        for _ in range(10):
+            summary_1 = create_benchmark_summary(name, results=[1, 2, 3])
+
+        # create a regression
+        summary_2 = create_benchmark_summary(name, results=[4, 5, 6])
+
+        entity = FakeEntity(f"{summary_1.id}...{summary_2.id}")
         if with_ids:
-            return summary.id, entity
+            return summary_1.id, summary_2.id, entity
         else:
             return entity
 
     def test_compare(self, client):
         self.authenticate(client)
-        new_id, compare = self._create(with_ids=True)
+        name = uuid.uuid4().hex
+        id_1, id_2, compare = self._create(name, with_ids=True)
         response = client.get(f"/api/compare/benchmarks/{compare.id}/")
 
-        # cheating by comparing benchmark to same benchmark
-        benchmark_ids = [new_id, new_id]
+        benchmark_ids = [id_1, id_2]
         batch_ids = [
             "7b2fdd9f929d47b9960152090d47f8e6",
             "7b2fdd9f929d47b9960152090d47f8e6",
@@ -57,17 +78,28 @@ def test_compare(self, client):
             benchmark_ids,
             batch_ids,
             run_ids,
-            "read",
+            name,
             CASE,
             tags={
                 "dataset": "nyctaxi_sample",
                 "cpu_count": 2,
                 "file_type": "parquet",
                 "input_type": "arrow",
                 "compression": "snappy",
-                "name": "read",
+                "name": name,
             },
         )
+        expected.update(
+            {
+                "baseline": "2.000 s",
+                "contender": "5.000 s",
+                "change": "150.000%",
+                "regression": True,
+                "baseline_z_score": "-0.302",
+                "contender_z_score": "3.015",
+                "contender_regression_z": True,
+            }
+        )
         self.assert_200_ok(response, expected)
 
     def test_compare_unknown_compare_ids(self, client):
@@ -80,11 +112,19 @@ class TestCompareBatchesGet(_asserts.GetEnforcer):
     url = "/api/compare/batches/{}/"
     public = True
 
-    def _create(self, with_ids=False, batch_id=None):
+    def _create(self, with_ids=False, run_id=None, batch_id=None):
         if batch_id is None:
             batch_id = uuid.uuid4().hex
-        summary1 = create_benchmark_summary("read", batch_id=batch_id)
-        summary2 = create_benchmark_summary("write", batch_id=batch_id)
+        summary1 = create_benchmark_summary(
+            "read",
+            run_id=run_id,
+            batch_id=batch_id,
+        )
+        summary2 = create_benchmark_summary(
+            "write",
+            run_id=run_id,
+            batch_id=batch_id,
+        )
         entity = FakeEntity(f"{batch_id}...{batch_id}")
         if with_ids:
             return [summary1.id, summary2.id], entity
@@ -93,16 +133,17 @@ def _create(self, with_ids=False, batch_id=None):
 
     def test_compare(self, client):
         self.authenticate(client)
-        batch_id = uuid.uuid4().hex
-        new_ids, compare = self._create(with_ids=True, batch_id=batch_id)
+        run_id, batch_id = uuid.uuid4().hex, uuid.uuid4().hex
+        new_ids, compare = self._create(
+            with_ids=True,
+            run_id=run_id,
+            batch_id=batch_id,
+        )
         response = client.get(f"/api/compare/batches/{compare.id}/")
 
         # cheating by comparing batch to same batch
         batch_ids = [batch_id, batch_id]
-        run_ids = [
-            "2a5709d179f349cba69ed242be3e6321",
-            "2a5709d179f349cba69ed242be3e6321",
-        ]
+        run_ids = [run_id, run_id]
         batches = ["read", "write"]
         benchmarks = [CASE, CASE]
         expected = _api_compare_list(
@@ -143,11 +184,19 @@ class TestCompareRunsGet(_asserts.GetEnforcer):
     url = "/api/compare/runs/{}/"
     public = True
 
-    def _create(self, with_ids=False, run_id=None):
+    def _create(self, with_ids=False, run_id=None, batch_id=None):
         if run_id is None:
             run_id = uuid.uuid4().hex
-        summary1 = create_benchmark_summary("read", run_id=run_id)
-        summary2 = create_benchmark_summary("write", run_id=run_id)
+        summary1 = create_benchmark_summary(
+            "read",
+            run_id=run_id,
+            batch_id=batch_id,
+        )
+        summary2 = create_benchmark_summary(
+            "write",
+            run_id=run_id,
+            batch_id=batch_id,
+        )
         entity = FakeEntity(f"{run_id}...{run_id}")
         if with_ids:
             return [summary1.id, summary2.id], entity
@@ -156,16 +205,17 @@ def _create(self, with_ids=False, run_id=None):
 
     def test_compare(self, client):
         self.authenticate(client)
-        run_id = uuid.uuid4().hex
-        new_ids, compare = self._create(with_ids=True, run_id=run_id)
+        run_id, batch_id = uuid.uuid4().hex, uuid.uuid4().hex
+        new_ids, compare = self._create(
+            with_ids=True,
+            run_id=run_id,
+            batch_id=batch_id,
+        )
         response = client.get(f"/api/compare/runs/{compare.id}/")
 
         # cheating by comparing run to same run
         run_ids = [run_id, run_id]
-        batch_ids = [
-            "7b2fdd9f929d47b9960152090d47f8e6",
-            "7b2fdd9f929d47b9960152090d47f8e6",
-        ]
+        batch_ids = [batch_id, batch_id]
         batches = ["read", "write"]
         benchmarks = [CASE, CASE]
         expected = _api_compare_list(

diff --git a/conbench/tests/conftest.py b/conbench/tests/conftest.py
@@ -5,6 +5,10 @@
 from ..db import Session, configure_engine, create_all, drop_all
 
 
+pytest.register_assert_rewrite("conbench.tests.api._asserts")
+pytest.register_assert_rewrite("conbench.tests.app._asserts")
+
+
 @pytest.fixture(scope="session", autouse=True)
 def create_db():
     configure_engine(TestConfig.SQLALCHEMY_DATABASE_URI)