Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7bde782
Initial plan
Copilot Nov 19, 2025
1e08fd0
Implement fixes for vllm bench sweep serve parameters
Copilot Nov 19, 2025
96ed69e
Exclude 'name' field from command generation and as_text output
Copilot Nov 19, 2025
fa852b4
Address code review feedback
Copilot Nov 19, 2025
cb81309
Use _benchmark_name instead of name and validate uniqueness
Copilot Nov 19, 2025
dec4fe1
Improve nested dict test with compilation_config examples
Copilot Nov 19, 2025
61ee392
format
ProExpertProg Nov 19, 2025
f8e5e9a
Add support for !=
ProExpertProg Nov 19, 2025
74a5ab0
Add comprehensive tests for plot filters including 'inf' edge case
Copilot Nov 19, 2025
a211e91
Add --fig-name parameter to control figure output filename
Copilot Nov 19, 2025
ae1650b
Change fig name behavior: use FIGURE as default and append group data…
Copilot Nov 19, 2025
fc1fc66
Make fig_name non-optional with default "FIGURE"
Copilot Nov 19, 2025
cee7dfa
Address code review feedback
Copilot Nov 24, 2025
01864b8
Simplify inf/nan conversion and remove default values from internal f…
Copilot Nov 25, 2025
e2ea79e
Add CLI args for error bars, figure width, and resolution
Copilot Nov 27, 2025
10eb288
Change default DPI to 300 and sort curve names alphabetically
Copilot Nov 27, 2025
b0bdf28
Remove lazy imports from test_plot_filters.py and rename fig_width to…
Copilot Nov 28, 2025
59d9982
Remove string inf/nan test cases since values are coerced to float
Copilot Nov 28, 2025
1de67c5
Apply suggestions from code review
ProExpertProg Dec 2, 2025
3b51e40
Use pytest.raises instead of assert False for exception testing
Copilot Dec 2, 2025
6c6bc34
Merge branch 'main' into copilot/add-bench-params-dict-support
ProExpertProg Dec 2, 2025
a371c9b
Apply suggestion from @ProExpertProg
ProExpertProg Dec 2, 2025
9d1e39e
Add support for !=
ProExpertProg Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 257 additions & 0 deletions tests/benchmarks/test_param_sweep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import tempfile
from pathlib import Path

import pytest

from vllm.benchmarks.sweep.param_sweep import ParameterSweep, ParameterSweepItem


class TestParameterSweepItem:
"""Test ParameterSweepItem functionality."""

@pytest.mark.parametrize(
"input_dict,expected",
[
(
{"compilation_config.use_inductor_graph_partition": False},
"--compilation-config.use_inductor_graph_partition=false",
),
(
{"compilation_config.use_inductor_graph_partition": True},
"--compilation-config.use_inductor_graph_partition=true",
),
(
{"compilation_config.use_inductor": False},
"--compilation-config.use_inductor=false",
),
(
{"compilation_config.use_inductor": True},
"--compilation-config.use_inductor=true",
),
],
)
def test_nested_boolean_params(self, input_dict, expected):
"""Test that nested boolean params use =true/false syntax."""
item = ParameterSweepItem.from_record(input_dict)
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
assert expected in cmd

@pytest.mark.parametrize(
"input_dict,expected",
[
({"enable_prefix_caching": False}, "--no-enable-prefix-caching"),
({"enable_prefix_caching": True}, "--enable-prefix-caching"),
({"disable_log_stats": False}, "--no-disable-log-stats"),
({"disable_log_stats": True}, "--disable-log-stats"),
],
)
def test_non_nested_boolean_params(self, input_dict, expected):
"""Test that non-nested boolean params use --no- prefix."""
item = ParameterSweepItem.from_record(input_dict)
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
assert expected in cmd

@pytest.mark.parametrize(
"compilation_config",
[
{"cudagraph_mode": "full", "mode": 2, "use_inductor_graph_partition": True},
{
"cudagraph_mode": "piecewise",
"mode": 3,
"use_inductor_graph_partition": False,
},
],
)
def test_nested_dict_value(self, compilation_config):
"""Test that nested dict values are serialized as JSON."""
item = ParameterSweepItem.from_record(
{"compilation_config": compilation_config}
)
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
assert "--compilation-config" in cmd
# The dict should be JSON serialized
idx = cmd.index("--compilation-config")
assert json.loads(cmd[idx + 1]) == compilation_config

@pytest.mark.parametrize(
"input_dict,expected_key,expected_value",
[
({"model": "test-model"}, "--model", "test-model"),
({"max_tokens": 100}, "--max-tokens", "100"),
({"temperature": 0.7}, "--temperature", "0.7"),
],
)
def test_string_and_numeric_values(self, input_dict, expected_key, expected_value):
"""Test that string and numeric values are handled correctly."""
item = ParameterSweepItem.from_record(input_dict)
cmd = item.apply_to_cmd(["vllm", "serve"])
assert expected_key in cmd
assert expected_value in cmd

@pytest.mark.parametrize(
"input_dict,expected_key,key_idx_offset",
[
({"max_tokens": 200}, "--max-tokens", 1),
({"enable_prefix_caching": False}, "--no-enable-prefix-caching", 0),
],
)
def test_replace_existing_parameter(self, input_dict, expected_key, key_idx_offset):
"""Test that existing parameters in cmd are replaced."""
item = ParameterSweepItem.from_record(input_dict)

if key_idx_offset == 1:
# Key-value pair
cmd = item.apply_to_cmd(["vllm", "serve", "--max-tokens", "100", "model"])
assert expected_key in cmd
idx = cmd.index(expected_key)
assert cmd[idx + 1] == "200"
assert "100" not in cmd
else:
# Boolean flag
cmd = item.apply_to_cmd(
["vllm", "serve", "--enable-prefix-caching", "model"]
)
assert expected_key in cmd
assert "--enable-prefix-caching" not in cmd


class TestParameterSweep:
"""Test ParameterSweep functionality."""

def test_from_records_list(self):
"""Test creating ParameterSweep from a list of records."""
records = [
{"max_tokens": 100, "temperature": 0.7},
{"max_tokens": 200, "temperature": 0.9},
]
sweep = ParameterSweep.from_records(records)
assert len(sweep) == 2
assert sweep[0]["max_tokens"] == 100
assert sweep[1]["max_tokens"] == 200

def test_read_from_dict(self):
"""Test creating ParameterSweep from a dict format."""
data = {
"experiment1": {"max_tokens": 100, "temperature": 0.7},
"experiment2": {"max_tokens": 200, "temperature": 0.9},
}
sweep = ParameterSweep.read_from_dict(data)
assert len(sweep) == 2

# Check that items have the _benchmark_name field
names = {item["_benchmark_name"] for item in sweep}
assert names == {"experiment1", "experiment2"}

# Check that parameters are preserved
for item in sweep:
if item["_benchmark_name"] == "experiment1":
assert item["max_tokens"] == 100
assert item["temperature"] == 0.7
elif item["_benchmark_name"] == "experiment2":
assert item["max_tokens"] == 200
assert item["temperature"] == 0.9

def test_read_json_list_format(self):
"""Test reading JSON file with list format."""
records = [
{"max_tokens": 100, "temperature": 0.7},
{"max_tokens": 200, "temperature": 0.9},
]

with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(records, f)
temp_path = Path(f.name)

try:
sweep = ParameterSweep.read_json(temp_path)
assert len(sweep) == 2
assert sweep[0]["max_tokens"] == 100
assert sweep[1]["max_tokens"] == 200
finally:
temp_path.unlink()

def test_read_json_dict_format(self):
"""Test reading JSON file with dict format."""
data = {
"experiment1": {"max_tokens": 100, "temperature": 0.7},
"experiment2": {"max_tokens": 200, "temperature": 0.9},
}

with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(data, f)
temp_path = Path(f.name)

try:
sweep = ParameterSweep.read_json(temp_path)
assert len(sweep) == 2

# Check that items have the _benchmark_name field
names = {item["_benchmark_name"] for item in sweep}
assert names == {"experiment1", "experiment2"}
finally:
temp_path.unlink()

def test_unique_benchmark_names_validation(self):
"""Test that duplicate _benchmark_name values raise an error."""
# Test with duplicate names in list format
records = [
{"_benchmark_name": "exp1", "max_tokens": 100},
{"_benchmark_name": "exp1", "max_tokens": 200},
]

with pytest.raises(ValueError, match="Duplicate _benchmark_name values"):
ParameterSweep.from_records(records)

def test_unique_benchmark_names_multiple_duplicates(self):
"""Test validation with multiple duplicate names."""
records = [
{"_benchmark_name": "exp1", "max_tokens": 100},
{"_benchmark_name": "exp1", "max_tokens": 200},
{"_benchmark_name": "exp2", "max_tokens": 300},
{"_benchmark_name": "exp2", "max_tokens": 400},
]

with pytest.raises(ValueError, match="Duplicate _benchmark_name values"):
ParameterSweep.from_records(records)

def test_no_benchmark_names_allowed(self):
"""Test that records without _benchmark_name are allowed."""
records = [
{"max_tokens": 100, "temperature": 0.7},
{"max_tokens": 200, "temperature": 0.9},
]
sweep = ParameterSweep.from_records(records)
assert len(sweep) == 2

def test_mixed_benchmark_names_allowed(self):
"""Test that mixing records with and without _benchmark_name is allowed."""
records = [
{"_benchmark_name": "exp1", "max_tokens": 100},
{"max_tokens": 200, "temperature": 0.9},
]
sweep = ParameterSweep.from_records(records)
assert len(sweep) == 2


class TestParameterSweepItemKeyNormalization:
"""Test key normalization in ParameterSweepItem."""

def test_underscore_to_hyphen_conversion(self):
"""Test that underscores are converted to hyphens in CLI."""
item = ParameterSweepItem.from_record({"max_tokens": 100})
cmd = item.apply_to_cmd(["vllm", "serve"])
assert "--max-tokens" in cmd

def test_nested_key_preserves_suffix(self):
"""Test that nested keys preserve the suffix format."""
# The suffix after the dot should preserve underscores
item = ParameterSweepItem.from_record(
{"compilation_config.some_nested_param": "value"}
)
cmd = item.apply_to_cmd(["vllm", "serve"])
# The prefix (compilation_config) gets converted to hyphens,
# but the suffix (some_nested_param) is preserved
assert any("compilation-config.some_nested_param" in arg for arg in cmd)
Loading