Skip to content

Commit

Permalink
Tweak z-score UI (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed Jun 7, 2021
1 parent cbf1522 commit 8226f82
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 139 deletions.
6 changes: 3 additions & 3 deletions conbench/api/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _api_compare_entity(benchmark_ids, batch_ids, run_ids, batch, benchmark, tag
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"threshold_z": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_z_regression": False,
Expand Down Expand Up @@ -155,7 +155,7 @@ def _api_compare_list(
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"threshold_z": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_z_regression": False,
Expand All @@ -181,7 +181,7 @@ def _api_compare_list(
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"threshold_z": "2.000",
"baseline_z_score": "0.000",
"contender_z_score": "0.000",
"baseline_z_regression": False,
Expand Down
24 changes: 12 additions & 12 deletions conbench/api/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get(self, compare_ids):
schema:
type: integer
- in: query
name: deviations
name: threshold_z
schema:
type: integer
tags:
Expand All @@ -68,9 +68,9 @@ def get(self, compare_ids):
if threshold is not None:
threshold = int(threshold)

deviations = f.request.args.get("deviations")
if deviations is not None:
deviations = int(deviations)
threshold_z = f.request.args.get("threshold_z")
if threshold_z is not None:
threshold_z = int(threshold_z)

try:
baseline_id, contender_id = compare_ids.split("...", 1)
Expand All @@ -92,14 +92,14 @@ def get(self, compare_ids):
baseline,
contender,
threshold,
deviations,
threshold_z,
).compare()
else:
return BenchmarkComparator(
baseline,
contender,
threshold,
deviations,
threshold_z,
).formatted()


Expand Down Expand Up @@ -134,7 +134,7 @@ def get(self, compare_ids):
schema:
type: integer
- in: query
name: deviations
name: threshold_z
schema:
type: integer
tags:
Expand All @@ -146,9 +146,9 @@ def get(self, compare_ids):
if threshold is not None:
threshold = int(threshold)

deviations = f.request.args.get("deviations")
if deviations is not None:
deviations = int(deviations)
threshold_z = f.request.args.get("threshold_z")
if threshold_z is not None:
threshold_z = int(threshold_z)

try:
baseline_id, contender_id = compare_ids.split("...", 1)
Expand All @@ -172,13 +172,13 @@ def get(self, compare_ids):
result = BenchmarkListComparator(
pairs,
threshold,
deviations,
threshold_z,
).compare()
else:
result = BenchmarkListComparator(
pairs,
threshold,
deviations,
threshold_z,
).formatted()

return f.jsonify(list(result))
Expand Down
4 changes: 2 additions & 2 deletions conbench/app/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ def _compare(self, params):
compare = f'{c["baseline_batch_id"]}...{c["contender_batch_id"]}'
c["compare_batches_url"] = f.url_for(view, compare_ids=compare)

if c["regression"]:
if c["contender_z_regression"]:
regressions += 1
if c["improvement"]:
if c["contender_z_improvement"]:
improvements += 1

return comparisons, regressions, improvements
Expand Down
44 changes: 22 additions & 22 deletions conbench/entities/_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from ..units import formatter_for_unit


THRESHOLD = 5.0 # percent
DEVIATIONS = 2.0 # standard deviations
CHANGE = 5.0 # percent changed threshold
Z_SCORE = 2.0 # z-score threshold


def fmt(value):
Expand All @@ -21,14 +21,14 @@ def _less_is_better(unit):
return True


def z_regression(z_score, deviations=None):
deviations = deviations if deviations else DEVIATIONS
return -z_score > deviations
def z_regression(z_score, threshold_z=None):
threshold_z = threshold_z if threshold_z else Z_SCORE
return -z_score > threshold_z


def z_improvement(z_score, deviations=None):
deviations = deviations if deviations else DEVIATIONS
return z_score > deviations
def z_improvement(z_score, threshold_z=None):
threshold_z = threshold_z if threshold_z else Z_SCORE
return z_score > threshold_z


class BenchmarkResult:
Expand Down Expand Up @@ -56,11 +56,11 @@ def __init__(


class BenchmarkComparator:
def __init__(self, baseline, contender, threshold=None, deviations=None):
def __init__(self, baseline, contender, threshold=None, threshold_z=None):
self.baseline = BenchmarkResult(**baseline) if baseline else None
self.contender = BenchmarkResult(**contender) if contender else None
self.threshold = float(threshold) if threshold is not None else THRESHOLD
self.deviations = float(deviations) if deviations is not None else DEVIATIONS
self.threshold = float(threshold) if threshold is not None else CHANGE
self.threshold_z = float(threshold_z) if threshold_z is not None else Z_SCORE

@property
def batch(self):
Expand Down Expand Up @@ -131,19 +131,19 @@ def contender_z_score(self):

@property
def baseline_z_regression(self):
return z_regression(self.baseline_z_score, self.deviations)
return z_regression(self.baseline_z_score, self.threshold_z)

@property
def baseline_z_improvement(self):
return z_improvement(self.baseline_z_score, self.deviations)
return z_improvement(self.baseline_z_score, self.threshold_z)

@property
def contender_z_regression(self):
return z_regression(self.contender_z_score, self.deviations)
return z_regression(self.contender_z_score, self.threshold_z)

@property
def contender_z_improvement(self):
return z_improvement(self.contender_z_score, self.deviations)
return z_improvement(self.contender_z_score, self.threshold_z)

@property
def tags(self):
Expand All @@ -164,7 +164,7 @@ def formatted(self):
"threshold": fmt(self.threshold) + "%",
"regression": self.regression,
"improvement": self.improvement,
"deviations": fmt(self.deviations),
"threshold_z": fmt(self.threshold_z),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_z_regression": self.baseline_z_regression,
Expand Down Expand Up @@ -194,7 +194,7 @@ def compare(self):
"threshold": fmt(self.threshold),
"regression": self.regression,
"improvement": self.improvement,
"deviations": fmt(self.deviations),
"threshold_z": fmt(self.threshold_z),
"baseline_z_score": fmt(self.baseline_z_score),
"contender_z_score": fmt(self.contender_z_score),
"baseline_z_regression": self.baseline_z_regression,
Expand All @@ -216,10 +216,10 @@ def compare(self):


class BenchmarkListComparator:
def __init__(self, pairs, threshold=None, deviations=None):
def __init__(self, pairs, threshold=None, threshold_z=None):
self.pairs = pairs
self.threshold = float(threshold) if threshold is not None else THRESHOLD
self.deviations = float(deviations) if deviations is not None else DEVIATIONS
self.threshold = float(threshold) if threshold is not None else CHANGE
self.threshold_z = float(threshold_z) if threshold_z is not None else Z_SCORE

def formatted(self):
for pair in self.pairs.values():
Expand All @@ -228,7 +228,7 @@ def formatted(self):
baseline,
contender,
self.threshold,
self.deviations,
self.threshold_z,
).formatted()

def compare(self):
Expand All @@ -238,5 +238,5 @@ def compare(self):
baseline,
contender,
self.threshold,
self.deviations,
self.threshold_z,
).compare()
31 changes: 10 additions & 21 deletions conbench/templates/compare-entity.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,46 +39,35 @@
<caption>{% include 'units-tooltip.html' %}</caption>
<thead>
<tr>
<th width="25%" scope="col">Z-Score</th>
<th width="25%" scope="col">Change</th>
<th width="25%" scope="col">Baseline</th>
<th width="25%" scope="col">Contender</th>
<th width="25%" scope="col">Change</th>
</tr>
</thead>
<tbody>
{% for c in comparisons %}
<tr>
<td>{{ c.baseline }}</td>
<td>{{ c.contender }}</td>
<td>
{{ c.change }}
{% if c.regression %}
<span class="glyphicon glyphicon-arrow-down"></span></b>
{% endif %}
{% if c.improvement %}
<span class="glyphicon glyphicon-arrow-up"></span></b>
{% endif %}
</td>
</tr>
<tr>
<td>
{{ c.baseline_z_score }} z
{% if c.baseline_z_regression %}
{{ c.contender_z_score }} z
{% if c.contender_z_regression %}
<span class="glyphicon glyphicon-arrow-down"></span></b>
{% endif %}
{% if c.baseline_z_improvement %}
{% if c.contender_z_improvement %}
<span class="glyphicon glyphicon-arrow-up"></span></b>
{% endif %}
</td>
<td>
{{ c.contender_z_score }} z
{% if c.contender_z_regression %}
{{ c.change }}
{% if c.regression %}
<span class="glyphicon glyphicon-arrow-down"></span></b>
{% endif %}
{% if c.contender_z_improvement %}
{% if c.improvement %}
<span class="glyphicon glyphicon-arrow-up"></span></b>
{% endif %}
</td>
<td>&nbsp;</td>
<td>{{ c.baseline }}</td>
<td>{{ c.contender }}</td>
</tr>
{% endfor %}
</tbody>
Expand Down
42 changes: 25 additions & 17 deletions conbench/templates/compare-list.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,43 @@
<div class="col-md-12">
<table id="benchmarks" class="table table-striped table-bordered table-hover">
<caption>
Comparisons
{% if comparisons %}
<span id="comparisons-tooltip" data-toggle="tooltip" data-html="true" data-placement="bottom" title="
Based on the z-score,
{{(100 * regressions / comparisons|length) | round(2) }}%
of these {{comparisons|length}} benchmarks were regressions,
and {{(100 * improvements / comparisons|length) | round(2) }}% were improvements."
>
Comparisons
<span class="glyphicon glyphicon-arrow-down"></span> <b>{{(100 * regressions / comparisons|length) | int}}%</b>
<span class="glyphicon glyphicon-arrow-up"></span> <b>{{(100 * improvements / comparisons|length) | int}}%</b>
</span>
{% else %}
Comparisons
{% endif %}
{% include 'units-tooltip.html' %}
</caption>
<thead>
<tr>
<th scope="col">Z-Score</th>
<th scope="col">Change</th>
<th scope="col">Batch</th>
<th scope="col">Benchmark</th>
<th scope="col">Baseline Z-Score</th>
<th scope="col">Contender Z-Score</th>
<th scope="col">Baseline</th>
<th scope="col">Contender</th>
</tr>
</thead>
<tbody>
{% for c in comparisons %}
<tr>
{% if c.contender is none %}
<td>---</td>
{% else %}
<td>{{ c.contender_z_score }}</td>
{% endif %}

<td>{{ c.change }}</td>

<td>
{% if type == "batch" %}
<div>{{ c.batch }}</div>
Expand All @@ -86,6 +101,7 @@
{% endif %}
{% endif %}
</td>

<td>
{% if c.contender is not none and c.baseline is not none %}
<a href="{{ c.compare_benchmarks_url }}">
Expand All @@ -96,18 +112,6 @@
{% endif %}
</td>

{% if c.baseline is none %}
<td>---</td>
{% else %}
<td>{{ c.baseline_z_score }}</td>
{% endif %}

{% if c.contender is none %}
<td>---</td>
{% else %}
<td>{{ c.contender_z_score }}</td>
{% endif %}

{% if c.baseline is none %}
<td>---</td>
{% else %}
Expand All @@ -134,12 +138,16 @@
var table = $('#benchmarks').dataTable( {
"responsive": true,
"order": [[0, 'asc']],
"columnDefs": [{ "orderable": false, "targets": [5, 6] }]
"columnDefs": [{ "orderable": false, "targets": [4, 5] }]
} );
new $.fn.dataTable.FixedHeader( table );

$(document).ready(function() {
$('#unit-tooltip').tooltip()
});
});

$(document).ready(function() {
$('#comparisons-tooltip').tooltip()
});
</script>
{% endblock %}
10 changes: 10 additions & 0 deletions conbench/templates/units-tooltip.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
<span id="unit-tooltip" data-toggle="tooltip" data-html="true" data-placement="bottom" title="
<ul style='list-style-type: none; padding: 0; margin: 0;'>
<li>Percent changed compares the baseline mean with the contender mean.
</ul>
<br>
<ul style='list-style-type: none; padding: 0; margin: 0;'>
<li>Z-score compares the contender mean with the entire distribution history of a given benchmark.
</ul>
<br>
<ul style='list-style-type: none; padding: 0; margin: 0;'>
<li>A negative percent changed or z-score is always a regression regardless of unit.
</ul>
Expand Down

0 comments on commit 8226f82

Please sign in to comment.