From dc4ebd9ce03cbd183ee9f12d41dde744fecaf433 Mon Sep 17 00:00:00 2001
From: Joseph Berry <joberry@redhat.com>
Date: Thu, 27 Nov 2025 16:49:47 +0200
Subject: [PATCH 1/4] fix html rendering when multiple percentiles to collapse
 to the same value

Signed-off-by: Joseph Berry <joberry@redhat.com>
---
 src/guidellm/benchmark/outputs/html.py   |  49 ++++++-
 tests/unit/benchmark/test_html_output.py | 162 +++++++++++++++++++++++
 2 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/benchmark/test_html_output.py

diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py
index 34cf71073..909a85da8 100644
--- a/src/guidellm/benchmark/outputs/html.py
+++ b/src/guidellm/benchmark/outputs/html.py
@@ -29,7 +29,7 @@
     GenerativeBenchmark,
     GenerativeBenchmarksReport,
 )
-from guidellm.schemas import DistributionSummary
+from guidellm.schemas import DistributionSummary, Percentiles
 from guidellm.settings import settings
 from guidellm.utils import camelize_str, recursive_key_update
 from guidellm.utils.text import load_text
@@ -190,6 +190,24 @@ def percentile_rows(self) -> list[dict[str, str | float]]:
             filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows)
         )
 
+    def model_dump(self, **kwargs) -> dict:
+        """
+        Override model_dump to filter duplicate consecutive percentile values.
+
+        This prevents visualization errors when distributions have limited data
+        points causing multiple percentiles to collapse to the same value.
+
+        :param kwargs: Arguments to pass to parent model_dump
+        :return: Dictionary with filtered percentiles
+        """
+        data = super().model_dump(**kwargs)
+
+        if "percentiles" in data and data["percentiles"]:
+            filtered_percentiles = _filter_duplicate_percentiles(data["percentiles"])
+            data["percentiles"] = filtered_percentiles
+
+        return data
+
     @classmethod
     def from_distribution_summary(
         cls, distribution: DistributionSummary
@@ -222,6 +240,35 @@ def _create_html_report(js_data: dict[str, str], output_path: Path) -> Path:
     return output_path
 
 
+def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, float]:
+    """
+    Filter out consecutive duplicate percentile values.
+
+    When distributions have very few data points, multiple percentiles can have
+    the same value, which causes visualization libraries to fail. This function
+    keeps only the first occurrence of consecutive duplicate values.
+
+    :param percentiles: Dictionary of percentile names to values
+    :return: Filtered percentiles dictionary with no consecutive duplicates
+    """
+    if not percentiles:
+        return percentiles
+    
+    percentile_order = list(Percentiles.model_fields.keys())
+
+    filtered = {}
+    previous_value = None
+
+    for key in percentile_order:
+        if key in percentiles:
+            current_value = percentiles[key]
+            if previous_value is None or current_value != previous_value:
+                filtered[key] = current_value
+                previous_value = current_value
+
+    return filtered
+
+
 def _inject_data(js_data: dict[str, str], html: str) -> str:
     """
     Inject JavaScript data into HTML head section.
diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
new file mode 100644
index 000000000..67bda8a64
--- /dev/null
+++ b/tests/unit/benchmark/test_html_output.py
@@ -0,0 +1,162 @@
+from guidellm.benchmark.outputs.html import _filter_duplicate_percentiles
+from guidellm.schemas import Percentiles
+
+
+def test_filter_all_same_values():
+    """Test filtering when all percentiles have the same value."""
+    percentiles = {
+        "p001": 15.288091352804853,
+        "p01": 15.288091352804853,
+        "p05": 15.288091352804853,
+        "p10": 15.288091352804853,
+        "p25": 15.288091352804853,
+        "p50": 15.288091352804853,
+        "p75": 15.288091352804853,
+        "p90": 15.288091352804853,
+        "p95": 15.288091352804853,
+        "p99": 15.288091352804853,
+        "p999": 15.288091352804853,
+    }
+
+    filtered = _filter_duplicate_percentiles(percentiles)
+
+    # Should only keep the first one
+    assert filtered == {"p001": 15.288091352804853}
+
+
+def test_filter_consecutive_duplicates():
+    """Test filtering when some consecutive percentiles have the same value."""
+    percentiles = {
+        "p001": 15.288091352804853,
+        "p01": 15.288091352804853,
+        "p05": 15.288091352804853,
+        "p10": 15.288091352804853,
+        "p25": 15.288091352804853,
+        "p50": 16.41327511776994,  # Different value
+        "p75": 16.41327511776994,
+        "p90": 17.03541629998259,  # Different value
+        "p95": 17.03541629998259,
+        "p99": 17.03541629998259,
+        "p999": 17.03541629998259,
+    }
+
+    filtered = _filter_duplicate_percentiles(percentiles)
+
+    # Should keep first of each group
+    assert filtered == {
+        "p001": 15.288091352804853,
+        "p50": 16.41327511776994,
+        "p90": 17.03541629998259,
+    }
+
+
+def test_no_duplicates():
+    """Test that unique values are all preserved."""
+    percentiles = {
+        "p001": 13.181080445834912,
+        "p01": 13.181080445834912,  # Same as p001
+        "p05": 13.530595573836457,  # Different
+        "p10": 13.843972502554365,
+        "p25": 14.086376978251748,
+        "p50": 14.403258051191058,
+        "p75": 14.738608817056042,
+        "p90": 15.18136631856698,
+        "p95": 15.7213110894772,
+        "p99": 15.7213110894772,  # Same as p95
+        "p999": 15.7213110894772,  # Same as p99
+    }
+
+    filtered = _filter_duplicate_percentiles(percentiles)
+
+    assert filtered == {
+        "p001": 13.181080445834912,
+        "p05": 13.530595573836457,
+        "p10": 13.843972502554365,
+        "p25": 14.086376978251748,
+        "p50": 14.403258051191058,
+        "p75": 14.738608817056042,
+        "p90": 15.18136631856698,
+        "p95": 15.7213110894772,
+    }
+
+
+def test_empty_percentiles():
+    """Test with empty percentiles dictionary."""
+    filtered = _filter_duplicate_percentiles({})
+    assert filtered == {}
+
+
+def test_single_percentile():
+    """Test with only one percentile."""
+    percentiles = {"p50": 14.403258051191058}
+    filtered = _filter_duplicate_percentiles(percentiles)
+    assert filtered == {"p50": 14.403258051191058}
+
+
+def test_two_different_values():
+    """Test with two different values."""
+    percentiles = {
+        "p25": 14.086376978251748,
+        "p50": 14.403258051191058,
+    }
+    filtered = _filter_duplicate_percentiles(percentiles)
+    assert filtered == percentiles
+
+
+def test_partial_percentiles():
+    """Test that order is maintained even with partial percentiles."""
+    percentiles = {
+        "p50": 16.41327511776994,
+        "p10": 15.288091352804853,
+        "p90": 17.03541629998259,
+    }
+
+    filtered = _filter_duplicate_percentiles(percentiles)
+
+    # Should maintain order from percentile_order list
+    assert list(filtered.keys()) == ["p10", "p50", "p90"]
+
+
+def test_model_dump_filters_duplicates():
+    """Test that model_dump applies percentile filtering."""
+    from guidellm.benchmark.outputs.html import _TabularDistributionSummary
+
+    # Create a distribution with duplicate percentiles (typical of small datasets)
+    dist = _TabularDistributionSummary(
+        mean=15.5,
+        median=15.288091352804853,
+        mode=15.288091352804853,
+        variance=0.1,
+        std_dev=0.316,
+        min=15.288091352804853,
+        max=17.03541629998259,
+        count=3,
+        total_sum=46.5,
+        percentiles=Percentiles(
+            p001=15.288091352804853,
+            p01=15.288091352804853,
+            p05=15.288091352804853,
+            p10=15.288091352804853,
+            p25=15.288091352804853,
+            p50=16.41327511776994,
+            p75=16.41327511776994,
+            p90=17.03541629998259,
+            p95=17.03541629998259,
+            p99=17.03541629998259,
+            p999=17.03541629998259,
+        ),
+    )
+
+    data = dist.model_dump()
+
+    # Check that percentiles were filtered
+    assert data["percentiles"] == {
+        "p001": 15.288091352804853,
+        "p50": 16.41327511776994,
+        "p90": 17.03541629998259,
+    }
+
+    # Ensure other fields remain unchanged
+    assert data["mean"] == 15.5
+    assert data["median"] == 15.288091352804853
+    assert data["count"] == 3

From 92389345ea2d926a1f28e9304d62c883746f0b5d Mon Sep 17 00:00:00 2001
From: Joseph Berry <joberry@redhat.com>
Date: Thu, 27 Nov 2025 22:52:03 +0200
Subject: [PATCH 2/4] add AI authorship comment to test_html_output.py

Signed-off-by: Joseph Berry <joberry@redhat.com>
---
 tests/unit/benchmark/test_html_output.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
index 67bda8a64..efd6727f9 100644
--- a/tests/unit/benchmark/test_html_output.py
+++ b/tests/unit/benchmark/test_html_output.py
@@ -1,3 +1,4 @@
+## WRITTEN BY AI ##
 from guidellm.benchmark.outputs.html import _filter_duplicate_percentiles
 from guidellm.schemas import Percentiles
 

From 175611c9500c78d384b9023afd463a52ec661078 Mon Sep 17 00:00:00 2001
From: Joseph Berry <joberry@redhat.com>
Date: Sun, 7 Dec 2025 10:50:59 +0200
Subject: [PATCH 3/4] fix: update duplicate percentile filtering to retain
 largest values for accuracy

Signed-off-by: Joseph Berry <joberry@redhat.com>
---
 src/guidellm/benchmark/outputs/html.py   | 12 ++++++++----
 tests/unit/benchmark/test_html_output.py | 25 ++++++++++++------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py
index 909a85da8..318d9d4de 100644
--- a/src/guidellm/benchmark/outputs/html.py
+++ b/src/guidellm/benchmark/outputs/html.py
@@ -246,27 +246,31 @@ def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, fl
 
     When distributions have very few data points, multiple percentiles can have
     the same value, which causes visualization libraries to fail. This function
-    keeps only the first occurrence of consecutive duplicate values.
+    keeps only the largest percentile for consecutive duplicate values, which is
+    more mathematically accurate as higher percentiles have greater statistical
+    significance.
 
     :param percentiles: Dictionary of percentile names to values
     :return: Filtered percentiles dictionary with no consecutive duplicates
     """
     if not percentiles:
         return percentiles
-    
+
     percentile_order = list(Percentiles.model_fields.keys())
 
+    # Iterate in reverse to keep the largest percentile for each value
     filtered = {}
     previous_value = None
 
-    for key in percentile_order:
+    for key in reversed(percentile_order):
         if key in percentiles:
             current_value = percentiles[key]
             if previous_value is None or current_value != previous_value:
                 filtered[key] = current_value
                 previous_value = current_value
 
-    return filtered
+    # Restore original order
+    return {key: filtered[key] for key in percentile_order if key in filtered}
 
 
 def _inject_data(js_data: dict[str, str], html: str) -> str:
diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
index efd6727f9..f5ce146c8 100644
--- a/tests/unit/benchmark/test_html_output.py
+++ b/tests/unit/benchmark/test_html_output.py
@@ -21,8 +21,8 @@ def test_filter_all_same_values():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
-    # Should only keep the first one
-    assert filtered == {"p001": 15.288091352804853}
+    # Should only keep the largest (p999) for mathematical accuracy
+    assert filtered == {"p999": 15.288091352804853}
 
 
 def test_filter_consecutive_duplicates():
@@ -43,11 +43,11 @@ def test_filter_consecutive_duplicates():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
-    # Should keep first of each group
+    # Should keep largest of each group for mathematical accuracy
     assert filtered == {
-        "p001": 15.288091352804853,
-        "p50": 16.41327511776994,
-        "p90": 17.03541629998259,
+        "p25": 15.288091352804853,
+        "p75": 16.41327511776994,
+        "p999": 17.03541629998259,
     }
 
 
@@ -69,15 +69,16 @@ def test_no_duplicates():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
+    # Should keep largest of each duplicate group (p01 instead of p001, p999 instead of p95)
     assert filtered == {
-        "p001": 13.181080445834912,
+        "p01": 13.181080445834912,
         "p05": 13.530595573836457,
         "p10": 13.843972502554365,
         "p25": 14.086376978251748,
         "p50": 14.403258051191058,
         "p75": 14.738608817056042,
         "p90": 15.18136631856698,
-        "p95": 15.7213110894772,
+        "p999": 15.7213110894772,
     }
 
 
@@ -150,11 +151,11 @@ def test_model_dump_filters_duplicates():
 
     data = dist.model_dump()
 
-    # Check that percentiles were filtered
+    # Check that percentiles were filtered, keeping largest of each group
     assert data["percentiles"] == {
-        "p001": 15.288091352804853,
-        "p50": 16.41327511776994,
-        "p90": 17.03541629998259,
+        "p25": 15.288091352804853,
+        "p75": 16.41327511776994,
+        "p999": 17.03541629998259,
     }
 
     # Ensure other fields remain unchanged

From 65a60233de329000d9b7ac3802d1ba8397029d52 Mon Sep 17 00:00:00 2001
From: Joseph Berry <joberry@redhat.com>
Date: Sun, 7 Dec 2025 12:04:31 +0200
Subject: [PATCH 4/4] fix:  line length E501

Signed-off-by: Joseph Berry <joberry@redhat.com>
---
 tests/unit/benchmark/test_html_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
index f5ce146c8..39c46a763 100644
--- a/tests/unit/benchmark/test_html_output.py
+++ b/tests/unit/benchmark/test_html_output.py
@@ -69,7 +69,7 @@ def test_no_duplicates():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
-    # Should keep largest of each duplicate group (p01 instead of p001, p999 instead of p95)
+    # Should keep largest of each duplicate group (e.g. p999 instead of p95)
     assert filtered == {
         "p01": 13.181080445834912,
         "p05": 13.530595573836457,