Skip to content

Commit

Permalink
Merge 4731623 into bfbf663
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed May 27, 2021
2 parents bfbf663 + 4731623 commit 90ffff0
Show file tree
Hide file tree
Showing 20 changed files with 823 additions and 262 deletions.
27 changes: 15 additions & 12 deletions conbench/api/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def _api_benchmark_entity(
"q3": "0.036942",
"stdev": "0.049194",
"timestamp": "2020-11-25T21:02:42.706806",
"z_score": "0.000000",
"z_regression": False,
"z_improvement": False,
},
"tags": {
"id": case_id,
Expand Down Expand Up @@ -117,10 +120,10 @@ def _api_compare_entity(benchmark_ids, batch_ids, run_ids, batch, benchmark, tag
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"baseline_z_regression": False,
"baseline_z_improvement": False,
"contender_z_regression": False,
"contender_z_improvement": False,
"contender": "0.036 s",
"contender_id": benchmark_ids[1],
"contender_batch_id": batch_ids[1],
Expand Down Expand Up @@ -155,10 +158,10 @@ def _api_compare_list(
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"baseline_z_regression": False,
"baseline_z_improvement": False,
"contender_z_regression": False,
"contender_z_improvement": False,
"contender": "0.036 s",
"contender_id": contender_ids[0],
"contender_batch_id": batch_ids[1],
Expand All @@ -181,10 +184,10 @@ def _api_compare_list(
"deviations": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_regression_z": False,
"baseline_improvement_z": False,
"contender_regression_z": False,
"contender_improvement_z": False,
"baseline_z_regression": False,
"baseline_z_improvement": False,
"contender_z_regression": False,
"contender_z_improvement": False,
"contender": "0.036 s",
"contender_id": contender_ids[1],
"contender_batch_id": batch_ids[1],
Expand Down
13 changes: 13 additions & 0 deletions conbench/api/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..api._endpoint import ApiEndpoint
from ..entities._entity import NotFound
from ..entities.case import Case
from ..entities.distribution import set_z_scores
from ..entities.summary import BenchmarkFacadeSchema, Summary, SummarySerializer


Expand Down Expand Up @@ -42,6 +43,7 @@ def get(self, benchmark_id):
- Benchmarks
"""
summary = self._get(benchmark_id)
set_z_scores([summary])
return self.serializer.one.dump(summary)

@flask_login.login_required
Expand Down Expand Up @@ -101,16 +103,26 @@ def get(self):
filters=[Case.name == name],
joins=[Case],
)
# TODO: cannot currently compute z_score on an arbitrary
# list of summaries - assumes same machine/sha/repository.
for summary in summaries:
summary.z_score = 0
elif batch_id:
summaries = Summary.search(
filters=[Summary.batch_id == batch_id],
)
set_z_scores(summaries)
elif run_id:
summaries = Summary.search(
filters=[Summary.run_id == run_id],
)
set_z_scores(summaries)
else:
summaries = Summary.all(order_by=Summary.timestamp.desc(), limit=500)
# TODO: cannot currently compute z_score on an arbitrary
# list of summaries - assumes same machine/sha/repository.s
for summary in summaries:
summary.z_score = 0
return self.serializer.many.dump(summaries)

@flask_login.login_required
Expand All @@ -131,6 +143,7 @@ def post(self):
"""
data = self.validate_benchmark(self.schema.create)
summary = Summary.create(data)
set_z_scores([summary])
return self.response_201_created(self.serializer.one.dump(summary))


Expand Down
2 changes: 1 addition & 1 deletion conbench/api/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@


from ..api import rule
from ..api._comparator import BenchmarkComparator, BenchmarkListComparator
from ..api._endpoint import ApiEndpoint
from ..entities._comparator import BenchmarkComparator, BenchmarkListComparator
from ..entities._entity import NotFound
from ..entities.distribution import set_z_scores
from ..entities.summary import Summary
Expand Down
4 changes: 0 additions & 4 deletions conbench/app/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ def _compare(self, params):
compare = f'{c["baseline_batch_id"]}...{c["contender_batch_id"]}'
c["compare_batches_url"] = f.url_for(view, compare_ids=compare)

c["change"] = float(c["change"][:-1])
if c["less_is_better"] and c["change"] != 0:
c["change"] = c["change"] * -1

if c["regression"]:
regressions += 1
if c["improvement"]:
Expand Down
74 changes: 40 additions & 34 deletions conbench/api/_comparator.py → conbench/entities/_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ def change_fmt(value):
return "{:.3%}".format(value)


def _less_is_better(unit):
if unit in ["B/s", "i/s"]:
return False
return True


def z_regression(z_score, deviations=None):
deviations = deviations if deviations else DEVIATIONS
return -z_score > deviations


def z_improvement(z_score, deviations=None):
deviations = deviations if deviations else DEVIATIONS
return z_score > deviations


class BenchmarkResult:
def __init__(
self,
Expand Down Expand Up @@ -72,9 +88,7 @@ def unit(self):

@property
def less_is_better(self):
if self.unit in ["B/s", "i/s"]:
return False
return True
return _less_is_better(self.unit)

@property
def change(self):
Expand All @@ -89,19 +103,19 @@ def change(self):
if old == 0:
return 0.0

return (new - old) / abs(old)
result = (new - old) / abs(old)
if self.less_is_better and result != 0:
result = result * -1

return result

@property
def regression(self):
change = self.change
adjusted_change = change if self.less_is_better else -change
return adjusted_change * 100 > self.threshold
return -self.change * 100 > self.threshold

@property
def improvement(self):
change = self.change
adjusted_change = -change if self.less_is_better else change
return adjusted_change * 100 > self.threshold
return self.change * 100 > self.threshold

@property
def baseline_z_score(self):
Expand All @@ -116,28 +130,20 @@ def contender_z_score(self):
return self.contender.z_score

@property
def baseline_regression_z(self):
z_score = self.baseline_z_score
adjusted_z_score = z_score if self.less_is_better else -z_score
return adjusted_z_score > self.deviations
def baseline_z_regression(self):
return z_regression(self.baseline_z_score, self.deviations)

@property
def baseline_improvement_z(self):
z_score = self.baseline_z_score
adjusted_z_score = -z_score if self.less_is_better else z_score
return adjusted_z_score > self.deviations
def baseline_z_improvement(self):
return z_improvement(self.baseline_z_score, self.deviations)

@property
def contender_regression_z(self):
z_score = self.contender_z_score
adjusted_z_score = z_score if self.less_is_better else -z_score
return adjusted_z_score > self.deviations
def contender_z_regression(self):
return z_regression(self.contender_z_score, self.deviations)

@property
def contender_improvement_z(self):
z_score = self.contender_z_score
adjusted_z_score = -z_score if self.less_is_better else z_score
return adjusted_z_score > self.deviations
def contender_z_improvement(self):
return z_improvement(self.contender_z_score, self.deviations)

@property
def tags(self):
Expand All @@ -161,10 +167,10 @@ def formatted(self):
"deviations": fmt(self.deviations),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_regression_z": self.baseline_regression_z,
"baseline_improvement_z": self.baseline_improvement_z,
"contender_regression_z": self.contender_regression_z,
"contender_improvement_z": self.contender_improvement_z,
"baseline_z_regression": self.baseline_z_regression,
"baseline_z_improvement": self.baseline_z_improvement,
"contender_z_regression": self.contender_z_regression,
"contender_z_improvement": self.contender_z_improvement,
"baseline": fmt_unit(baseline, self.unit),
"contender": fmt_unit(contender, self.unit),
"baseline_id": self.baseline.id if self.baseline else None,
Expand All @@ -191,10 +197,10 @@ def compare(self):
"deviations": fmt(self.deviations),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_regression_z": self.baseline_regression_z,
"baseline_improvement_z": self.baseline_improvement_z,
"contender_regression_z": self.contender_regression_z,
"contender_improvement_z": self.contender_improvement_z,
"baseline_z_regression": self.baseline_z_regression,
"baseline_z_improvement": self.baseline_z_improvement,
"contender_z_regression": self.contender_z_regression,
"contender_z_improvement": self.contender_z_improvement,
"baseline": fmt(baseline),
"contender": fmt(contender),
"baseline_id": self.baseline.id if self.baseline else None,
Expand Down
3 changes: 3 additions & 0 deletions conbench/entities/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
NotNull,
Nullable,
)
from ..entities._comparator import _less_is_better
from ..entities.commit import Commit
from ..entities.machine import Machine
from ..entities.run import Run
Expand Down Expand Up @@ -184,3 +185,5 @@ def set_z_scores(summaries):
d = lookup.get(f"{summary.case_id}-{summary.context_id}")
if d and d.mean_sd:
summary.z_score = (summary.mean - d.mean_mean) / d.mean_sd
if _less_is_better(summary.unit) and summary.z_score != 0:
summary.z_score = summary.z_score * -1
4 changes: 4 additions & 0 deletions conbench/entities/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
NotNull,
Nullable,
)
from ..entities._comparator import z_improvement, z_regression
from ..entities.case import Case
from ..entities.context import Context
from ..entities.commit import Commit, parse_commit
Expand Down Expand Up @@ -215,6 +216,9 @@ def _dump(self, summary):
"q3": self.decimal_fmt.format(summary.q3),
"iqr": self.decimal_fmt.format(summary.iqr),
"timestamp": summary.timestamp.isoformat(),
"z_score": self.decimal_fmt.format(summary.z_score),
"z_regression": z_regression(summary.z_score),
"z_improvement": z_improvement(summary.z_score),
},
"links": {
"list": f.url_for("api.benchmarks", _external=True),
Expand Down
8 changes: 5 additions & 3 deletions conbench/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def record(self, result, name, tags, context, github, options, output=None):
result.get("time_unit", "s"),
timestamp,
run_id,
self.batch_id,
run_name,
)
benchmark = {
Expand Down Expand Up @@ -224,7 +225,8 @@ def _get_timing_options(self, options):
"iterations": options.get("iterations", 1),
}

def _stats(self, data, unit, times, time_unit, timestamp, run_id, run_name):
@staticmethod
def _stats(data, unit, times, time_unit, timestamp, run_id, batch_id, run_name):
fmt = "{:.6f}"

def _format(f, data, min_length=0):
Expand All @@ -236,7 +238,7 @@ def _format(f, data, min_length=0):
q1, q3 = np.percentile(data, [25, 75])

if not run_id:
run_id = self.batch_id
run_id = batch_id

result = {
"data": [fmt.format(x) for x in data],
Expand All @@ -245,7 +247,7 @@ def _format(f, data, min_length=0):
"time_unit": time_unit,
"iterations": len(data),
"timestamp": timestamp,
"batch_id": self.batch_id,
"batch_id": batch_id,
"run_id": run_id,
"mean": _format(statistics.mean, data),
"median": _format(statistics.median, data),
Expand Down
2 changes: 2 additions & 0 deletions conbench/templates/batch.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
<th scope="col">Batch</th>
<th scope="col">Benchmark</th>
<th scope="col">Mean</th>
<th scope="col">Z-Score</th>
</tr>
</thead>
<tbody>
Expand All @@ -73,6 +74,7 @@
<div>{{ benchmark.display_name }}</div>
</a></td>
<td>{{ benchmark.display_mean }}</td>
<td>{{ benchmark.stats.z_score }}</td>
</tr>
{% endfor %}
</tbody>
Expand Down
12 changes: 8 additions & 4 deletions conbench/templates/benchmark-entity.html
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@
</li>
{% endif %}
{% endfor %}
<li class="list-group-item" style="overflow-y: auto;">
<b>&nbsp;</b>
<div align="right" style="display:inline-block; float: right;">&nbsp;</div>
</li>
<li class="list-group-item active">Tags</li>
{% for k,v in benchmark.tags.items() %}
<li class="list-group-item" style="overflow-y: auto;">
Expand Down Expand Up @@ -109,6 +105,14 @@
<div align="right" style="display:inline-block; float: right;">{{ v }}</div>
</li>
{% endfor %}
<li class="list-group-item" style="overflow-y: auto;">
<b>&nbsp;</b>
<div align="right" style="display:inline-block; float: right;">&nbsp;</div>
</li>
<li class="list-group-item" style="overflow-y: auto;">
<b>&nbsp;</b>
<div align="right" style="display:inline-block; float: right;">&nbsp;</div>
</li>
<li class="list-group-item active" >Context</li>
{% for k,v in benchmark.context.items() %}
<li class="list-group-item" style="overflow-y: auto;">
Expand Down
1 change: 1 addition & 0 deletions conbench/templates/benchmark-list.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
var table = $('#benchmarks').dataTable( {
"responsive": true,
"order": [[0, 'desc']],
"columnDefs": [{ "orderable": false, "targets": [3] }]
} );
{% else %}
var table = $('#benchmarks').dataTable( {
Expand Down

0 comments on commit 90ffff0

Please sign in to comment.