Skip to content

Commit

Permalink
Z-score API scafold (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed May 20, 2021
1 parent a60fafa commit 3852549
Show file tree
Hide file tree
Showing 6 changed files with 546 additions and 22 deletions.
99 changes: 88 additions & 11 deletions conbench/api/_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from ..units import formatter_for_unit


THRESHOLD = 5 # percent
THRESHOLD = 5.0 # percent
DEVIATIONS = 2.0 # standard deviations


def fmt(value):
Expand All @@ -15,7 +16,18 @@ def change_fmt(value):


class BenchmarkResult:
def __init__(self, id, batch_id, run_id, unit, value, batch, benchmark, tags):
def __init__(
self,
id,
batch_id,
run_id,
unit,
value,
batch,
benchmark,
tags,
z_score,
):
self.id = id
self.batch_id = batch_id
self.run_id = run_id
Expand All @@ -24,13 +36,15 @@ def __init__(self, id, batch_id, run_id, unit, value, batch, benchmark, tags):
self.benchmark = benchmark
self.value = decimal.Decimal(value)
self.tags = tags
self.z_score = decimal.Decimal(z_score)


class BenchmarkComparator:
def __init__(self, baseline, contender, threshold=None):
def __init__(self, baseline, contender, threshold=None, deviations=None):
self.baseline = BenchmarkResult(**baseline) if baseline else None
self.contender = BenchmarkResult(**contender) if contender else None
self.threshold = threshold if threshold is not None else THRESHOLD
self.threshold = float(threshold) if threshold is not None else THRESHOLD
self.deviations = float(deviations) if deviations is not None else DEVIATIONS

@property
def batch(self):
Expand Down Expand Up @@ -89,6 +103,42 @@ def improvement(self):
adjusted_change = -change if self.less_is_better else change
return adjusted_change * 100 > self.threshold

@property
def baseline_z_score(self):
if self.baseline is None:
return 0.0
return self.baseline.z_score

@property
def contender_z_score(self):
if self.contender is None:
return 0.0
return self.contender.z_score

@property
def baseline_regression_z(self):
z_score = self.baseline_z_score
adjusted_z_score = z_score if self.less_is_better else -z_score
return adjusted_z_score > self.deviations

@property
def baseline_improvement_z(self):
z_score = self.baseline_z_score
adjusted_z_score = -z_score if self.less_is_better else z_score
return adjusted_z_score > self.deviations

@property
def contender_regression_z(self):
z_score = self.contender_z_score
adjusted_z_score = z_score if self.less_is_better else -z_score
return adjusted_z_score > self.deviations

@property
def contender_improvement_z(self):
z_score = self.contender_z_score
adjusted_z_score = -z_score if self.less_is_better else z_score
return adjusted_z_score > self.deviations

@property
def tags(self):
if self.baseline is not None:
Expand All @@ -98,17 +148,25 @@ def tags(self):
return "unknown"

def formatted(self):
fmt = formatter_for_unit(self.unit)
fmt_unit = formatter_for_unit(self.unit)
baseline = self.baseline.value if self.baseline else None
contender = self.contender.value if self.contender else None
return {
"batch": self.batch,
"benchmark": self.benchmark,
"change": change_fmt(self.change),
"threshold": fmt(self.threshold) + "%",
"regression": self.regression,
"improvement": self.improvement,
"baseline": fmt(baseline, self.unit),
"contender": fmt(contender, self.unit),
"deviations": fmt(self.deviations),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_regression_z": self.baseline_regression_z,
"baseline_improvement_z": self.baseline_improvement_z,
"contender_regression_z": self.contender_regression_z,
"contender_improvement_z": self.contender_improvement_z,
"baseline": fmt_unit(baseline, self.unit),
"contender": fmt_unit(contender, self.unit),
"baseline_id": self.baseline.id if self.baseline else None,
"contender_id": self.contender.id if self.contender else None,
"baseline_batch_id": self.baseline.batch_id if self.baseline else None,
Expand All @@ -127,8 +185,16 @@ def compare(self):
"batch": self.batch,
"benchmark": self.benchmark,
"change": fmt(self.change * 100),
"threshold": fmt(self.threshold),
"regression": self.regression,
"improvement": self.improvement,
"deviations": fmt(self.deviations),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_regression_z": self.baseline_regression_z,
"baseline_improvement_z": self.baseline_improvement_z,
"contender_regression_z": self.contender_regression_z,
"contender_improvement_z": self.contender_improvement_z,
"baseline": fmt(baseline),
"contender": fmt(contender),
"baseline_id": self.baseline.id if self.baseline else None,
Expand All @@ -144,16 +210,27 @@ def compare(self):


class BenchmarkListComparator:
def __init__(self, pairs, threshold=None):
def __init__(self, pairs, threshold=None, deviations=None):
self.pairs = pairs
self.threshold = threshold if threshold is not None else THRESHOLD
self.threshold = float(threshold) if threshold is not None else THRESHOLD
self.deviations = float(deviations) if deviations is not None else DEVIATIONS

def formatted(self):
for pair in self.pairs.values():
baseline, contender = pair.get("baseline"), pair.get("contender")
yield BenchmarkComparator(baseline, contender, self.threshold).formatted()
yield BenchmarkComparator(
baseline,
contender,
self.threshold,
self.deviations,
).formatted()

def compare(self):
for pair in self.pairs.values():
baseline, contender = pair.get("baseline"), pair.get("contender")
yield BenchmarkComparator(baseline, contender, self.threshold).compare()
yield BenchmarkComparator(
baseline,
contender,
self.threshold,
self.deviations,
).compare()
36 changes: 30 additions & 6 deletions conbench/api/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,21 @@ def _api_compare_entity(benchmark_ids, batch_ids, run_ids, batch, benchmark, tag
"batch": batch,
"benchmark": benchmark,
"change": "0.000%",
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"contender": "0.036 s",
"contender_id": benchmark_ids[1],
"contender_batch_id": batch_ids[1],
"contender_run_id": run_ids[1],
"less_is_better": True,
"regression": False,
"improvement": False,
"unit": "s",
"tags": tags,
}
Expand All @@ -141,13 +149,21 @@ def _api_compare_list(
"batch": batches[0],
"benchmark": benchmarks[0],
"change": "0.000%",
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"contender": "0.036 s",
"contender_id": contender_ids[0],
"contender_batch_id": batch_ids[1],
"contender_run_id": run_ids[1],
"less_is_better": True,
"regression": False,
"improvement": False,
"unit": "s",
"tags": tags[0],
},
Expand All @@ -159,13 +175,21 @@ def _api_compare_list(
"batch": batches[1],
"benchmark": benchmarks[1],
"change": "0.000%",
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"contender": "0.036 s",
"contender_id": contender_ids[1],
"contender_batch_id": batch_ids[1],
"contender_run_id": run_ids[1],
"less_is_better": True,
"regression": False,
"improvement": False,
"unit": "s",
"tags": tags[1],
},
Expand Down
45 changes: 41 additions & 4 deletions conbench/api/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def _compare_entity(summary):
"benchmark": summary.display_name,
"batch": summary.display_batch,
"tags": summary.case.tags,
"z_score": 0.0, # TODO
}


Expand Down Expand Up @@ -52,14 +53,23 @@ def get(self, compare_ids):
name: threshold
schema:
type: integer
- in: query
name: deviations
schema:
type: integer
tags:
- Compare
"""
raw = f.request.args.get("raw", "false").lower() in ["true", "1"]

threshold = f.request.args.get("threshold")
if threshold is not None:
threshold = int(threshold)

deviations = f.request.args.get("deviations")
if deviations is not None:
deviations = int(deviations)

try:
baseline_id, contender_id = compare_ids.split("...", 1)
except ValueError:
Expand All @@ -76,9 +86,19 @@ def get(self, compare_ids):
contender = _compare_entity(contender_summary)

if raw:
return BenchmarkComparator(baseline, contender, threshold).compare()
return BenchmarkComparator(
baseline,
contender,
threshold,
deviations,
).compare()
else:
return BenchmarkComparator(baseline, contender, threshold).formatted()
return BenchmarkComparator(
baseline,
contender,
threshold,
deviations,
).formatted()


class CompareBatchesAPI(ApiEndpoint):
Expand Down Expand Up @@ -113,14 +133,23 @@ def get(self, compare_ids):
name: threshold
schema:
type: integer
- in: query
name: deviations
schema:
type: integer
tags:
- Compare
"""
raw = f.request.args.get("raw", "false").lower() in ["true", "1"]

threshold = f.request.args.get("threshold")
if threshold is not None:
threshold = int(threshold)

deviations = f.request.args.get("deviations")
if deviations is not None:
deviations = int(deviations)

try:
baseline_id, contender_id = compare_ids.split("...", 1)
except ValueError:
Expand All @@ -140,9 +169,17 @@ def get(self, compare_ids):
self._add_pair(pairs, summary, "contender")

if raw:
result = BenchmarkListComparator(pairs, threshold).compare()
result = BenchmarkListComparator(
pairs,
threshold,
deviations,
).compare()
else:
result = BenchmarkListComparator(pairs, threshold).formatted()
result = BenchmarkListComparator(
pairs,
threshold,
deviations,
).formatted()

return f.jsonify(list(result))

Expand Down
49 changes: 49 additions & 0 deletions conbench/entities/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,52 @@ def update_distribution(repository, sha, summary, limit):
)
)
conn.commit()


q = """SELECT
run_id,
summary.case_id,
summary.machine_id,
machine.name AS machine_name,
summary.unit,
summary.time_unit,
summary.min,
summary.max,
summary.mean,
summary.median,
commit.timestamp AS commit_timestamp,
commit.repository AS commit_repository,
(summary.mean - distribution.mean_mean) / distribution.mean_sd AS mean_z,
(summary.min - distribution.min_mean) / distribution.min_sd AS min_z,
(summary.max - distribution.max_mean) / distribution.max_sd AS max_z,
(summary.median - distribution.median_mean) / distribution.median_sd AS median_z,
commit.timestamp AS commit_timestamp,
commit.repository AS commit_repository,
distribution.*
FROM summary
INNER JOIN run
ON summary.run_id = run.id
INNER JOIN commit
ON commit.id = run.commit_id
INNER JOIN machine
ON summary.machine_id = machine.id
LEFT JOIN distribution
ON summary.case_id = distribution.case_id AND machine.name = distribution.machine_name
WHERE run.name = 'commit: {{to_compare_sha}}'"""


def get_z_score(repository, sha, case_id, context_id, machine_hash, mean):
result = list(
Session.query(Distribution.mean_mean, Distribution.mean_sd).filter(
Distribution.repository == repository,
Distribution.sha == sha,
Distribution.case_id == case_id,
Distribution.context_id == context_id,
Distribution.machine_hash == machine_hash,
)
)
if result:
distribution_mean = result[0]["mean_mean"]
distribution_sd = result[0]["mean_sd"]
return (mean - distribution_mean) / distribution_sd
return None

0 comments on commit 3852549

Please sign in to comment.