Skip to content

Commit

Permalink
Merge 683e15a into 25d7485
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed May 10, 2021
2 parents 25d7485 + 683e15a commit 45b9607
Show file tree
Hide file tree
Showing 9 changed files with 753 additions and 9 deletions.
2 changes: 1 addition & 1 deletion conbench/api/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def _api_machine_entity(machine_id, links=True):
def _api_run_entity(run_id, commit_id, machine_id, now, baseline_id):
result = {
"id": run_id,
"name": "pull request: 9564",
"name": "commit: 02addad336ba19a654f9c857ede546331be7b631",
"timestamp": now,
"commit": _api_commit_entity(commit_id),
"machine": _api_machine_entity(machine_id, links=False),
Expand Down
4 changes: 4 additions & 0 deletions conbench/entities/_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class EntityMixin:
def __repr__(self):
return f"<{self.__class__.__name__} {self.id}>"

@classmethod
def count(cls):
return Session.query(cls).count()

@classmethod
def distinct(cls, column, filters):
q = Session.query(distinct(column))
Expand Down
136 changes: 136 additions & 0 deletions conbench/entities/distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import sqlalchemy as s
from sqlalchemy import func
from sqlalchemy import CheckConstraint as check
from sqlalchemy.dialects.postgresql import insert

from ..db import Session
from ..entities._entity import (
Base,
EntityMixin,
generate_uuid,
NotNull,
Nullable,
)
from ..entities.commit import Commit
from ..entities.run import Run


class Distribution(Base, EntityMixin):
__tablename__ = "distribution"
id = NotNull(s.String(50), primary_key=True, default=generate_uuid)
sha = NotNull(s.String(50))
repository = NotNull(s.String(100))
case_id = NotNull(s.String(50), s.ForeignKey("case.id", ondelete="CASCADE"))
context_id = NotNull(s.String(50), s.ForeignKey("context.id", ondelete="CASCADE"))
machine_id = NotNull(s.String(50), s.ForeignKey("machine.id", ondelete="CASCADE"))
unit = NotNull(s.Text)
mean_mean = Nullable(s.Numeric, check("mean_mean>=0"))
mean_sd = Nullable(s.Numeric, check("mean_sd>=0"))
min_mean = Nullable(s.Numeric, check("min_mean>=0"))
min_sd = Nullable(s.Numeric, check("min_sd>=0"))
max_mean = Nullable(s.Numeric, check("max_mean>=0"))
max_sd = Nullable(s.Numeric, check("max_sd>=0"))
median_mean = Nullable(s.Numeric, check("median_mean>=0"))
median_sd = Nullable(s.Numeric, check("median_sd>=0"))
first_timestamp = NotNull(s.DateTime(timezone=False))
last_timestamp = NotNull(s.DateTime(timezone=False))
observations = NotNull(s.Integer, check("observations>=1"))


s.Index(
"distribution_index",
Distribution.sha,
Distribution.case_id,
Distribution.context_id,
Distribution.machine_id,
unique=True,
)
s.Index("distribution_sha_index", Distribution.sha)
s.Index("distribution_repository_index", Distribution.repository)
s.Index("distribution_case_id_index", Distribution.case_id)
s.Index("distribution_context_id_index", Distribution.context_id)
s.Index("distribution_machine_id_index", Distribution.machine_id)


def get_commit_index(repository):
ordered = (
Session.query(Commit.id, Commit.sha, Commit.timestamp)
.filter(Commit.repository == repository)
.order_by(Commit.timestamp.desc())
).cte("ordered_commits")
return Session.query(ordered, func.row_number().over().label("row_number"))


def get_sha_row_number(repository, sha):
index = get_commit_index(repository).subquery().alias("commit_index")
return Session.query(index.c.row_number).filter(index.c.sha == sha)


def get_commits_up(repository, sha, limit):
index = get_commit_index(repository).subquery().alias("commit_index")
n = Session.query(index.c.row_number).filter(index.c.sha == sha).scalar_subquery()
return Session.query(index).filter(index.c.row_number >= n).limit(limit)


def get_distribution(repository, sha, case_id, context_id, machine_id, limit):
from ..entities.summary import Summary

commits_up = get_commits_up(repository, sha, limit).subquery().alias("commits_up")
return (
Session.query(
func.text(repository).label("repository"),
func.text(sha).label("sha"),
Summary.case_id,
Summary.context_id,
Summary.machine_id,
func.max(Summary.unit).label("unit"),
func.avg(Summary.mean).label("mean_mean"),
func.stddev(Summary.mean).label("mean_sd"),
func.avg(Summary.min).label("min_mean"),
func.stddev(Summary.min).label("min_sd"),
func.avg(Summary.max).label("max_mean"),
func.stddev(Summary.max).label("max_sd"),
func.avg(Summary.median).label("median_mean"),
func.stddev(Summary.median).label("median_sd"),
func.min(commits_up.c.timestamp).label("first_timestamp"),
func.max(commits_up.c.timestamp).label("last_timestamp"),
func.count(Summary.mean).label("observations"),
)
.group_by(Summary.case_id, Summary.context_id, Summary.machine_id)
.join(Run, Run.id == Summary.run_id)
.join(commits_up, commits_up.c.id == Run.commit_id)
.filter(
Run.name.like("commit: %"),
Summary.case_id == case_id,
Summary.context_id == context_id,
Summary.machine_id == machine_id,
)
)


def update_distribution(repository, sha, summary, limit):
from ..db import engine

distribution = get_distribution(
repository,
sha,
summary.case_id,
summary.context_id,
summary.machine_id,
limit,
).first()

if not distribution:
return

values = dict(distribution)
with engine.connect() as conn:
conn.execute(
insert(Distribution.__table__)
.values(values)
.on_conflict_do_update(
index_elements=["sha", "case_id", "context_id", "machine_id"],
set_=values,
)
)
conn.commit()
11 changes: 7 additions & 4 deletions conbench/entities/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
)
from ..entities.case import Case
from ..entities.context import Context
from ..entities.data import Data
from ..entities.commit import Commit, parse_commit
from ..entities.data import Data
from ..entities.distribution import update_distribution
from ..entities.machine import Machine, MachineSchema
from ..entities.run import Run
from ..entities.time import Time
Expand All @@ -29,12 +30,12 @@ class Summary(Base, EntityMixin):
__tablename__ = "summary"
id = NotNull(s.String(50), primary_key=True, default=generate_uuid)
case_id = NotNull(s.String(50), s.ForeignKey("case.id"))
machine_id = NotNull(s.String(50), s.ForeignKey("machine.id"))
context_id = NotNull(s.String(50), s.ForeignKey("context.id"))
machine_id = NotNull(s.String(50), s.ForeignKey("machine.id"))
run_id = NotNull(s.Text, s.ForeignKey("run.id"))
case = relationship("Case", lazy="joined")
machine = relationship("Machine", lazy="select")
context = relationship("Context", lazy="select")
machine = relationship("Machine", lazy="select")
run = relationship("Run", lazy="select")
data = relationship(
"Data",
Expand Down Expand Up @@ -131,8 +132,8 @@ def create(data):
)

stats["case_id"] = case.id
stats["machine_id"] = machine.id
stats["context_id"] = context.id
stats["machine_id"] = machine.id
summary = Summary(**stats)
summary.save()

Expand All @@ -148,6 +149,8 @@ def create(data):
bulk.append(Time(result=x, summary_id=summary.id, iteration=i + 1))
Time.bulk_save_objects(bulk)

update_distribution(repository, sha, summary, 1000)

return summary


Expand Down
6 changes: 3 additions & 3 deletions conbench/tests/api/_expected_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@
"os_name": "macOS",
"os_version": "10.15.7",
},
"name": "pull request: 9564",
"name": "commit: 02addad336ba19a654f9c857ede546331be7b631",
"timestamp": "2021-02-04T17:22:05.225583",
}
}
Expand Down Expand Up @@ -527,7 +527,7 @@
"os_name": "macOS",
"os_version": "10.15.7",
},
"name": "pull request: 9564",
"name": "commit: 02addad336ba19a654f9c857ede546331be7b631",
"timestamp": "2021-02-04T17:22:05.225583",
},
{
Expand Down Expand Up @@ -565,7 +565,7 @@
"os_name": "macOS",
"os_version": "10.15.7",
},
"name": "pull request: 9564",
"name": "commit: 02addad336ba19a654f9c857ede546331be7b631",
"timestamp": "2021-03-04T17:18:05.715583",
},
]
Expand Down
57 changes: 56 additions & 1 deletion conbench/tests/api/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import copy
import decimal
import uuid

import pytest

from ...api._examples import _api_benchmark_entity
from ...entities._entity import NotFound
from ...entities.distribution import Distribution
from ...entities.summary import Summary
from ...tests.api import _asserts

Expand Down Expand Up @@ -40,7 +43,7 @@
"stats": {
"batch_id": "7b2fdd9f929d47b9960152090d47f8e6",
"run_id": "2a5709d179f349cba69ed242be3e6321",
"run_name": "pull request: 9564",
"run_name": "commit: 02addad336ba19a654f9c857ede546331be7b631",
"data": [
"0.099094",
"0.037129",
Expand Down Expand Up @@ -232,3 +235,55 @@ def test_nested_schema_validation(self, client):
},
}
self.assert_400_bad_request(response, message)

def test_create_benchmark_distribution(self, client):
self.authenticate(client)
data = copy.deepcopy(self.valid_payload)
data["tags"]["name"] = uuid.uuid4().hex

# first result
response = client.post("/api/benchmarks/", json=data)
new_id = response.json["id"]
summary_1 = Summary.one(id=new_id)
location = "http://localhost/api/benchmarks/%s/" % new_id
self.assert_201_created(response, _expected_entity(summary_1), location)
case_id = summary_1.case_id

# after one result
distributions = Distribution.search(filters=[Distribution.case_id == case_id])
assert len(distributions) == 1
assert distributions[0].unit == "s"
assert distributions[0].observations == 1
assert distributions[0].mean_mean == decimal.Decimal("0.03636900000000000000")
assert distributions[0].mean_sd is None
assert distributions[0].min_mean == decimal.Decimal("0.00473300000000000000")
assert distributions[0].min_sd is None
assert distributions[0].max_mean == decimal.Decimal("0.14889600000000000000")
assert distributions[0].max_sd is None
assert distributions[0].median_mean == decimal.Decimal("0.00898800000000000000")
assert distributions[0].median_sd is None

# second result
response = client.post("/api/benchmarks/", json=data)
new_id = response.json["id"]
summary_2 = Summary.one(id=new_id)
location = "http://localhost/api/benchmarks/%s/" % new_id
self.assert_201_created(response, _expected_entity(summary_2), location)
assert summary_1.case_id == summary_2.case_id
assert summary_1.context_id == summary_2.context_id
assert summary_1.machine_id == summary_2.machine_id
assert summary_1.run.commit_id == summary_2.run.commit_id

# after two results
distributions = Distribution.search(filters=[Distribution.case_id == case_id])
assert len(distributions) == 1
assert distributions[0].unit == "s"
assert distributions[0].observations == 2
assert distributions[0].mean_mean == decimal.Decimal("0.03636900000000000000")
assert distributions[0].mean_sd == decimal.Decimal("0")
assert distributions[0].min_mean == decimal.Decimal("0.00473300000000000000")
assert distributions[0].min_sd == decimal.Decimal("0")
assert distributions[0].max_mean == decimal.Decimal("0.14889600000000000000")
assert distributions[0].max_sd == decimal.Decimal("0")
assert distributions[0].median_mean == decimal.Decimal("0.00898800000000000000")
assert distributions[0].median_sd == decimal.Decimal("0")

0 comments on commit 45b9607

Please sign in to comment.