Tweak z-score UI (#79)

conbench · Jun 7, 2021 · 8226f82 · 8226f82
1 parent cbf1522
commit 8226f82
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 139 deletions.
diff --git a/conbench/api/_examples.py b/conbench/api/_examples.py
@@ -117,7 +117,7 @@ def _api_compare_entity(benchmark_ids, batch_ids, run_ids, batch, benchmark, tag
         "threshold": "5.000%",
         "regression": False,
         "improvement": False,
-        "deviations": "2.000",
+        "threshold_z": "2.000",
         "baseline_z_score": "0.000",
         "contender_z_score": "0.000",
         "baseline_z_regression": False,
@@ -155,7 +155,7 @@ def _api_compare_list(
             "threshold": "5.000%",
             "regression": False,
             "improvement": False,
-            "deviations": "2.000",
+            "threshold_z": "2.000",
             "baseline_z_score": "0.000",
             "contender_z_score": "0.000",
             "baseline_z_regression": False,
@@ -181,7 +181,7 @@ def _api_compare_list(
             "threshold": "5.000%",
             "regression": False,
             "improvement": False,
-            "deviations": "2.000",
+            "threshold_z": "2.000",
             "baseline_z_score": "0.000",
             "contender_z_score": "0.000",
             "baseline_z_regression": False,

diff --git a/conbench/api/compare.py b/conbench/api/compare.py
@@ -56,7 +56,7 @@ def get(self, compare_ids):
             schema:
               type: integer
           - in: query
-            name: deviations
+            name: threshold_z
             schema:
               type: integer
         tags:
@@ -68,9 +68,9 @@ def get(self, compare_ids):
         if threshold is not None:
             threshold = int(threshold)
 
-        deviations = f.request.args.get("deviations")
-        if deviations is not None:
-            deviations = int(deviations)
+        threshold_z = f.request.args.get("threshold_z")
+        if threshold_z is not None:
+            threshold_z = int(threshold_z)
 
         try:
             baseline_id, contender_id = compare_ids.split("...", 1)
@@ -92,14 +92,14 @@ def get(self, compare_ids):
                 baseline,
                 contender,
                 threshold,
-                deviations,
+                threshold_z,
             ).compare()
         else:
             return BenchmarkComparator(
                 baseline,
                 contender,
                 threshold,
-                deviations,
+                threshold_z,
             ).formatted()
 
 
@@ -134,7 +134,7 @@ def get(self, compare_ids):
             schema:
               type: integer
           - in: query
-            name: deviations
+            name: threshold_z
             schema:
               type: integer
         tags:
@@ -146,9 +146,9 @@ def get(self, compare_ids):
         if threshold is not None:
             threshold = int(threshold)
 
-        deviations = f.request.args.get("deviations")
-        if deviations is not None:
-            deviations = int(deviations)
+        threshold_z = f.request.args.get("threshold_z")
+        if threshold_z is not None:
+            threshold_z = int(threshold_z)
 
         try:
             baseline_id, contender_id = compare_ids.split("...", 1)
@@ -172,13 +172,13 @@ def get(self, compare_ids):
             result = BenchmarkListComparator(
                 pairs,
                 threshold,
-                deviations,
+                threshold_z,
             ).compare()
         else:
             result = BenchmarkListComparator(
                 pairs,
                 threshold,
-                deviations,
+                threshold_z,
             ).formatted()
 
         return f.jsonify(list(result))

diff --git a/conbench/app/compare.py b/conbench/app/compare.py
@@ -124,9 +124,9 @@ def _compare(self, params):
                 compare = f'{c["baseline_batch_id"]}...{c["contender_batch_id"]}'
                 c["compare_batches_url"] = f.url_for(view, compare_ids=compare)
 
-                if c["regression"]:
+                if c["contender_z_regression"]:
                     regressions += 1
-                if c["improvement"]:
+                if c["contender_z_improvement"]:
                     improvements += 1
 
         return comparisons, regressions, improvements

diff --git a/conbench/entities/_comparator.py b/conbench/entities/_comparator.py
@@ -3,8 +3,8 @@
 from ..units import formatter_for_unit
 
 
-THRESHOLD = 5.0  # percent
-DEVIATIONS = 2.0  # standard deviations
+CHANGE = 5.0  # percent changed threshold
+Z_SCORE = 2.0  # z-score threshold
 
 
 def fmt(value):
@@ -21,14 +21,14 @@ def _less_is_better(unit):
     return True
 
 
-def z_regression(z_score, deviations=None):
-    deviations = deviations if deviations else DEVIATIONS
-    return -z_score > deviations
+def z_regression(z_score, threshold_z=None):
+    threshold_z = threshold_z if threshold_z else Z_SCORE
+    return -z_score > threshold_z
 
 
-def z_improvement(z_score, deviations=None):
-    deviations = deviations if deviations else DEVIATIONS
-    return z_score > deviations
+def z_improvement(z_score, threshold_z=None):
+    threshold_z = threshold_z if threshold_z else Z_SCORE
+    return z_score > threshold_z
 
 
 class BenchmarkResult:
@@ -56,11 +56,11 @@ def __init__(
 
 
 class BenchmarkComparator:
-    def __init__(self, baseline, contender, threshold=None, deviations=None):
+    def __init__(self, baseline, contender, threshold=None, threshold_z=None):
         self.baseline = BenchmarkResult(**baseline) if baseline else None
         self.contender = BenchmarkResult(**contender) if contender else None
-        self.threshold = float(threshold) if threshold is not None else THRESHOLD
-        self.deviations = float(deviations) if deviations is not None else DEVIATIONS
+        self.threshold = float(threshold) if threshold is not None else CHANGE
+        self.threshold_z = float(threshold_z) if threshold_z is not None else Z_SCORE
 
     @property
     def batch(self):
@@ -131,19 +131,19 @@ def contender_z_score(self):
 
     @property
     def baseline_z_regression(self):
-        return z_regression(self.baseline_z_score, self.deviations)
+        return z_regression(self.baseline_z_score, self.threshold_z)
 
     @property
     def baseline_z_improvement(self):
-        return z_improvement(self.baseline_z_score, self.deviations)
+        return z_improvement(self.baseline_z_score, self.threshold_z)
 
     @property
     def contender_z_regression(self):
-        return z_regression(self.contender_z_score, self.deviations)
+        return z_regression(self.contender_z_score, self.threshold_z)
 
     @property
     def contender_z_improvement(self):
-        return z_improvement(self.contender_z_score, self.deviations)
+        return z_improvement(self.contender_z_score, self.threshold_z)
 
     @property
     def tags(self):
@@ -164,7 +164,7 @@ def formatted(self):
             "threshold": fmt(self.threshold) + "%",
             "regression": self.regression,
             "improvement": self.improvement,
-            "deviations": fmt(self.deviations),
+            "threshold_z": fmt(self.threshold_z),
             "baseline_z_score": fmt(self.baseline_z_score),
             "contender_z_score": fmt(self.contender_z_score),
             "baseline_z_regression": self.baseline_z_regression,
@@ -194,7 +194,7 @@ def compare(self):
             "threshold": fmt(self.threshold),
             "regression": self.regression,
             "improvement": self.improvement,
-            "deviations": fmt(self.deviations),
+            "threshold_z": fmt(self.threshold_z),
             "baseline_z_score": fmt(self.baseline_z_score),
             "contender_z_score": fmt(self.contender_z_score),
             "baseline_z_regression": self.baseline_z_regression,
@@ -216,10 +216,10 @@ def compare(self):
 
 
 class BenchmarkListComparator:
-    def __init__(self, pairs, threshold=None, deviations=None):
+    def __init__(self, pairs, threshold=None, threshold_z=None):
         self.pairs = pairs
-        self.threshold = float(threshold) if threshold is not None else THRESHOLD
-        self.deviations = float(deviations) if deviations is not None else DEVIATIONS
+        self.threshold = float(threshold) if threshold is not None else CHANGE
+        self.threshold_z = float(threshold_z) if threshold_z is not None else Z_SCORE
 
     def formatted(self):
         for pair in self.pairs.values():
@@ -228,7 +228,7 @@ def formatted(self):
                 baseline,
                 contender,
                 self.threshold,
-                self.deviations,
+                self.threshold_z,
             ).formatted()
 
     def compare(self):
@@ -238,5 +238,5 @@ def compare(self):
                 baseline,
                 contender,
                 self.threshold,
-                self.deviations,
+                self.threshold_z,
             ).compare()
diff --git a/conbench/templates/compare-entity.html b/conbench/templates/compare-entity.html
@@ -39,46 +39,35 @@
             <caption>{% include 'units-tooltip.html' %}</caption>
                 <thead>
                     <tr>
+                        <th width="25%" scope="col">Z-Score</th>
+                        <th width="25%" scope="col">Change</th>
                         <th width="25%" scope="col">Baseline</th>
                         <th width="25%" scope="col">Contender</th>
-                        <th width="25%" scope="col">Change</th>
                     </tr>
                 </thead>
                 <tbody>
                     {% for c in comparisons %}
-                    <tr>
-                         <td>{{ c.baseline }}</td>
-                         <td>{{ c.contender }}</td>
-                         <td>
-                           {{ c.change }}
-                           {% if c.regression %}
-                           <span class="glyphicon glyphicon-arrow-down"></span></b>
-                           {% endif %}
-                           {% if c.improvement %}
-                           <span class="glyphicon glyphicon-arrow-up"></span></b>
-                           {% endif %}
-                         </td>
-                    </tr>
                     <tr>
                          <td>
-                           {{ c.baseline_z_score }} z
-                           {% if c.baseline_z_regression %}
+                           {{ c.contender_z_score }} z
+                           {% if c.contender_z_regression %}
                            <span class="glyphicon glyphicon-arrow-down"></span></b>
                            {% endif %}
-                           {% if c.baseline_z_improvement %}
+                           {% if c.contender_z_improvement %}
                            <span class="glyphicon glyphicon-arrow-up"></span></b>
                            {% endif %}
                          </td>
                          <td>
-                           {{ c.contender_z_score }} z
-                           {% if c.contender_z_regression %}
+                           {{ c.change }}
+                           {% if c.regression %}
                            <span class="glyphicon glyphicon-arrow-down"></span></b>
                            {% endif %}
-                           {% if c.contender_z_improvement %}
+                           {% if c.improvement %}
                            <span class="glyphicon glyphicon-arrow-up"></span></b>
                            {% endif %}
                          </td>
-                         <td>&nbsp;</td>
+                         <td>{{ c.baseline }}</td>
+                         <td>{{ c.contender }}</td>
                     </tr>
                     {% endfor %}
                 </tbody>

diff --git a/conbench/templates/compare-list.html b/conbench/templates/compare-list.html
@@ -51,28 +51,43 @@
         <div class="col-md-12">
             <table id="benchmarks" class="table table-striped table-bordered table-hover">
               <caption>
-                Comparisons
                 {% if comparisons %}
+                <span id="comparisons-tooltip" data-toggle="tooltip" data-html="true" data-placement="bottom" title="
+                Based on the z-score,
+                {{(100 * regressions / comparisons|length) | round(2) }}%
+                of these {{comparisons|length}} benchmarks were regressions,
+                and {{(100 * improvements / comparisons|length) | round(2) }}% were improvements."
+                >
+                Comparisons
                 <span class="glyphicon glyphicon-arrow-down"></span> <b>{{(100 * regressions / comparisons|length) | int}}%</b>
                 <span class="glyphicon glyphicon-arrow-up"></span> <b>{{(100 * improvements / comparisons|length) | int}}%</b>
+                </span>
+                {% else %}
+                Comparisons
                 {% endif %}
                 {% include 'units-tooltip.html' %}
               </caption>
                 <thead>
                     <tr>
+                        <th scope="col">Z-Score</th>
                         <th scope="col">Change</th>
                         <th scope="col">Batch</th>
                         <th scope="col">Benchmark</th>
-                        <th scope="col">Baseline Z-Score</th>
-                        <th scope="col">Contender Z-Score</th>
                         <th scope="col">Baseline</th>
                         <th scope="col">Contender</th>
                     </tr>
                 </thead>
                 <tbody>
                     {% for c in comparisons %}
                     <tr>
+                         {% if c.contender is none %}
+                           <td>---</td>
+                         {% else %}
+                           <td>{{ c.contender_z_score }}</td>
+                         {% endif %}
+
                          <td>{{ c.change }}</td>
+
                          <td>
                            {% if type == "batch" %}
                              <div>{{ c.batch }}</div>
@@ -86,6 +101,7 @@
                              {% endif %}
                            {% endif %}
                          </td>
+
                          <td>
                            {% if c.contender is not none and c.baseline is not none %}
                              <a href="{{ c.compare_benchmarks_url }}">
@@ -96,18 +112,6 @@
                            {% endif %}
                          </td>
 
-                         {% if c.baseline is none %}
-                           <td>---</td>
-                         {% else %}
-                           <td>{{ c.baseline_z_score }}</td>
-                         {% endif %}
-
-                         {% if c.contender is none %}
-                           <td>---</td>
-                         {% else %}
-                           <td>{{ c.contender_z_score }}</td>
-                         {% endif %}
-
                          {% if c.baseline is none %}
                            <td>---</td>
                          {% else %}
@@ -134,12 +138,16 @@
   var table = $('#benchmarks').dataTable( {
       "responsive": true,
       "order": [[0, 'asc']],
-      "columnDefs": [{ "orderable": false, "targets": [5, 6] }]
+      "columnDefs": [{ "orderable": false, "targets": [4, 5] }]
   } );
   new $.fn.dataTable.FixedHeader( table );
 
   $(document).ready(function() {
     $('#unit-tooltip').tooltip()
-   });
+  });
+
+  $(document).ready(function() {
+    $('#comparisons-tooltip').tooltip()
+  });
   </script>
 {% endblock %}
diff --git a/conbench/templates/units-tooltip.html b/conbench/templates/units-tooltip.html
@@ -1,4 +1,14 @@
 <span id="unit-tooltip" data-toggle="tooltip" data-html="true" data-placement="bottom" title="
+<ul style='list-style-type: none; padding: 0; margin: 0;'>
+<li>Percent changed compares the baseline mean with the contender mean.
+</ul>
+<br>
+
+<ul style='list-style-type: none; padding: 0; margin: 0;'>
+<li>Z-score compares the contender mean with the entire distribution history of a given benchmark.
+</ul>
+<br>
+
 <ul style='list-style-type: none; padding: 0; margin: 0;'>
 <li>A negative percent changed or z-score is always a regression regardless of unit.
 </ul>