Skip to content

Commit

Permalink
Merge c691125 into 843ad94
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed Jul 30, 2021
2 parents 843ad94 + c691125 commit 3f01768
Show file tree
Hide file tree
Showing 15 changed files with 211 additions and 452 deletions.
1 change: 0 additions & 1 deletion conbench/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from .commits import * # noqa
from .compare import * # noqa
from .contexts import * # noqa
from .distribution import * # noqa
from .history import * # noqa
from .index import * # noqa
from .machines import * # noqa
Expand Down
2 changes: 0 additions & 2 deletions conbench/api/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def _201_created(example, schema=None):
spec.components.response("CompareList", _200_ok(ex.COMPARE_LIST))
spec.components.response("ContextEntity", _200_ok(ex.CONTEXT_ENTITY))
spec.components.response("ContextList", _200_ok([ex.CONTEXT_ENTITY]))
spec.components.response("DistributionList", _200_ok([ex.DISTRIBUTION_ENTITY]))
spec.components.response("HistoryList", _200_ok([ex.HISTORY_ENTITY]))
spec.components.response("MachineEntity", _200_ok(ex.MACHINE_ENTITY))
spec.components.response("MachineList", _200_ok([ex.MACHINE_ENTITY]))
Expand All @@ -89,7 +88,6 @@ def _201_created(example, schema=None):
{"name": "Commits", "description": "Benchmarked commits"},
{"name": "Comparisons", "description": "Benchmark comparisons"},
{"name": "Contexts", "description": "Benchmark contexts"},
{"name": "Distribution", "description": "Benchmark distribution"},
{"name": "History", "description": "Benchmark history"},
{"name": "Machines", "description": "Benchmark machines"},
{"name": "Runs", "description": "Benchmark runs"},
Expand Down
31 changes: 2 additions & 29 deletions conbench/api/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,29 +224,6 @@ def _api_context_entity(context_id, links=True):
return result


def _api_distribution_entity(
distribution_id,
case_id,
context_id,
commit_id,
):
result = {
"id": distribution_id,
"case_id": case_id,
"context_id": context_id,
"commit_id": commit_id,
"machine_hash": "diana-2-4-17179869184",
"unit": "s",
"mean_mean": "0.036369",
"mean_sd": "0.000000",
"repository": "https://github.com/apache/arrow",
"sha": "02addad336ba19a654f9c857ede546331be7b631",
"first_timestamp": "2021-02-25T01:02:51",
"last_timestamp": "2021-02-25T01:02:51",
}
return result


def _api_history_entity(benchmark_id, case_id, context_id):
return {
"benchmark_id": benchmark_id,
Expand All @@ -255,6 +232,8 @@ def _api_history_entity(benchmark_id, case_id, context_id):
"machine_hash": "diana-2-4-17179869184",
"unit": "s",
"mean": "0.036369",
"distribution_mean": "0.036369",
"distribution_stdev": "0.000000",
"repository": "https://github.com/apache/arrow",
"sha": "02addad336ba19a654f9c857ede546331be7b631",
"timestamp": "2021-02-25T01:02:51",
Expand Down Expand Up @@ -362,12 +341,6 @@ def _api_run_entity(run_id, commit_id, machine_id, now, baseline_id):
],
)
CONTEXT_ENTITY = _api_context_entity("some-context-uuid-1")
DISTRIBUTION_ENTITY = _api_distribution_entity(
"some-distribution-uuid-1",
"some-case-uuid-1",
"some-context-uuid-1",
"some-commit-uuid-1",
)
HISTORY_ENTITY = _api_history_entity(
"some-benchmark-uuid-1",
"some-case-uuid-1",
Expand Down
48 changes: 0 additions & 48 deletions conbench/api/distribution.py

This file was deleted.

29 changes: 7 additions & 22 deletions conbench/app/_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
class TimeSeriesPlotMixin:
def _get_history_plot(self, benchmark):
history = self._get_history(benchmark)
distribution = self._get_distribution(benchmark)
return json.dumps(
bokeh.embed.json_item(
time_series_plot(history, distribution, benchmark["id"]),
time_series_plot(history, benchmark["id"]),
"plot-history",
)
)
Expand All @@ -24,13 +23,6 @@ def _get_history(self, benchmark):
return []
return response.json

def _get_distribution(self, benchmark):
response = self.api_get("api.distribution", benchmark_id=benchmark["id"])
if response.status_code != 200:
self.flash("Error getting distribution.")
return []
return response.json


def get_display_unit(unit):
if unit == "s":
Expand Down Expand Up @@ -101,17 +93,10 @@ def simple_bar_plot(benchmarks, height=400, width=400):
return p


def time_series_plot(history, distribution, benchmark_id, height=250, width=1000):
dist_by_sha = {d["sha"]: d for d in distribution}
for h in history:
dist = dist_by_sha.get(h["sha"])
if dist:
h["mean_mean"] = dist["mean_mean"]
h["mean_sd"] = dist["mean_sd"]

def time_series_plot(history, benchmark_id, height=250, width=1000):
unit = get_display_unit(history[0]["unit"])
current = [h for h in history if h["benchmark_id"] == benchmark_id]
with_dist = [h for h in history if h.get("mean_mean")]
with_dist = [h for h in history if h["distribution_mean"]]

times = [h["mean"] for h in history]
commits = [h["message"] for h in history]
Expand All @@ -121,15 +106,15 @@ def time_series_plot(history, distribution, benchmark_id, height=250, width=1000
commits_x = [c["message"] for c in current]
dates_x = [dateutil.parser.isoparse(c["timestamp"]) for c in current]

times_mean = [w["mean_mean"] for w in with_dist]
times_mean = [w["distribution_mean"] for w in with_dist]
commits_mean = [w["message"] for w in with_dist]
dates_mean = [dateutil.parser.isoparse(w["timestamp"]) for w in with_dist]

alert_min, alert_max = [], []
for w in with_dist:
alert = 5 * float(w["mean_sd"])
alert_min.append(float(w["mean_mean"]) - alert)
alert_max.append(float(w["mean_mean"]) + alert)
alert = 5 * float(w["distribution_stdev"])
alert_min.append(float(w["distribution_mean"]) - alert)
alert_max.append(float(w["distribution_mean"]) + alert)

source_data = dict(x=dates, y=times, commit=commits)
source = bokeh.models.ColumnDataSource(data=source_data)
Expand Down
128 changes: 35 additions & 93 deletions conbench/entities/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from ..entities._entity import (
Base,
EntityMixin,
EntitySerializer,
generate_uuid,
NotNull,
Nullable,
Expand All @@ -21,11 +20,9 @@
class Distribution(Base, EntityMixin):
__tablename__ = "distribution"
id = NotNull(s.String(50), primary_key=True, default=generate_uuid)
sha = NotNull(s.String(50))
repository = NotNull(s.String(100))
case_id = NotNull(s.String(50), s.ForeignKey("case.id", ondelete="CASCADE"))
context_id = NotNull(s.String(50), s.ForeignKey("context.id", ondelete="CASCADE"))
commit_id = Nullable(s.String(50), s.ForeignKey("commit.id", ondelete="CASCADE"))
commit_id = NotNull(s.String(50), s.ForeignKey("commit.id", ondelete="CASCADE"))
machine_hash = NotNull(s.String(250))
unit = NotNull(s.Text)
mean_mean = Nullable(s.Numeric, check("mean_mean>=0"))
Expand All @@ -44,71 +41,12 @@ class Distribution(Base, EntityMixin):

s.Index(
"distribution_index",
Distribution.sha,
Distribution.case_id,
Distribution.context_id,
Distribution.commit_id,
Distribution.machine_hash,
unique=True,
)
s.Index("distribution_sha_index", Distribution.sha)
s.Index("distribution_repository_index", Distribution.repository)
s.Index("distribution_case_id_index", Distribution.case_id)
s.Index("distribution_context_id_index", Distribution.context_id)
s.Index("distribution_commit_id_index", Distribution.commit_id)
s.Index("distribution_machine_hash_index", Distribution.machine_hash)


class _Serializer(EntitySerializer):
decimal_fmt = "{:.6f}"

def _dump(self, distribution):
standard_deviation = distribution.mean_sd if distribution.mean_sd else 0
result = {
"id": distribution.id,
"sha": distribution.sha,
"repository": distribution.repository,
"case_id": distribution.case_id,
"context_id": distribution.context_id,
"commit_id": distribution.commit_id,
"machine_hash": distribution.machine_hash,
"unit": distribution.unit,
"mean_mean": self.decimal_fmt.format(distribution.mean_mean),
"mean_sd": self.decimal_fmt.format(standard_deviation),
"first_timestamp": distribution.first_timestamp.isoformat(),
"last_timestamp": distribution.last_timestamp.isoformat(),
}
return result


class DistributionSerializer:
one = _Serializer()
many = _Serializer(many=True)


def get_distribution_history(case_id, context_id, machine_hash):
return (
Session.query(
Distribution.id,
Distribution.repository,
Distribution.sha,
Distribution.case_id,
Distribution.context_id,
Distribution.commit_id,
Distribution.machine_hash,
Distribution.unit,
Distribution.mean_mean,
Distribution.mean_sd,
Distribution.first_timestamp,
Distribution.last_timestamp,
)
.filter(
Distribution.case_id == case_id,
Distribution.context_id == context_id,
Distribution.machine_hash == machine_hash,
)
.order_by(Distribution.first_timestamp.asc())
.all()
)


def get_commit_index(repository):
Expand All @@ -131,19 +69,24 @@ def get_commits_up(repository, sha, limit):
return Session.query(index).filter(index.c.row_number >= n).limit(limit)


def get_distribution(
repository, sha, case_id, context_id, commit_id, machine_hash, limit
):
def get_distribution(summary, limit):
from ..entities.summary import Summary

commits_up = get_commits_up(repository, sha, limit).subquery().alias("commits_up")
commits_up = (
get_commits_up(
summary.run.commit.repository,
summary.run.commit.sha,
limit,
)
.subquery()
.alias("commits_up")
)

return (
Session.query(
func.text(repository).label("repository"),
func.text(sha).label("sha"),
func.text(case_id).label("case_id"),
func.text(context_id).label("context_id"),
func.text(commit_id).label("commit_id"),
func.text(summary.case_id).label("case_id"),
func.text(summary.context_id).label("context_id"),
func.text(summary.run.commit_id).label("commit_id"),
Machine.hash,
func.max(Summary.unit).label("unit"),
func.avg(Summary.mean).label("mean_mean"),
Expand Down Expand Up @@ -171,25 +114,17 @@ def get_distribution(
.join(commits_up, commits_up.c.id == Run.commit_id)
.filter(
Run.name.like("commit: %"),
Summary.case_id == case_id,
Summary.context_id == context_id,
Machine.hash == machine_hash,
Summary.case_id == summary.case_id,
Summary.context_id == summary.context_id,
Machine.hash == summary.run.machine.hash,
)
)


def update_distribution(repository, sha, summary, limit):
def update_distribution(summary, limit):
from ..db import engine

distribution = get_distribution(
repository,
sha,
summary.case_id,
summary.context_id,
summary.run.commit_id,
summary.run.machine.hash,
limit,
).first()
distribution = get_distribution(summary, limit).first()

if not distribution:
return
Expand All @@ -204,7 +139,7 @@ def update_distribution(repository, sha, summary, limit):
insert(Distribution.__table__)
.values(values)
.on_conflict_do_update(
index_elements=["sha", "case_id", "context_id", "machine_hash"],
index_elements=["case_id", "context_id", "commit_id", "machine_hash"],
set_=values,
)
)
Expand All @@ -215,15 +150,21 @@ def set_z_scores(summaries):
if not summaries:
return

for summary in summaries:
summary.z_score = 0

first = summaries[0]
repository = first.run.commit.repository
sha = first.run.commit.parent
machine_hash = first.run.machine.hash
parent_commit = Commit.first(
sha=first.run.commit.parent,
repository=first.run.commit.repository,
)

if not parent_commit:
return

where = [
Distribution.repository == repository,
Distribution.sha == sha,
Distribution.machine_hash == machine_hash,
Distribution.commit_id == parent_commit.id,
Distribution.machine_hash == first.run.machine.hash,
]
if len(summaries) == 1:
where.extend(
Expand All @@ -239,6 +180,7 @@ def set_z_scores(summaries):
Distribution.mean_mean,
Distribution.mean_sd,
]

distributions = Session.query(*cols).filter(*where).all()
lookup = {f"{d.case_id}-{d.context_id}": d for d in distributions}

Expand Down

0 comments on commit 3f01768

Please sign in to comment.