Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ace9f11
Updates for scenarios and benchmarking entrypoints to reenable them f…
markurtz Oct 16, 2025
6a93c56
Initial state for new progress output
markurtz Oct 16, 2025
081363e
Intermediate working state for refactoring progress and output
markurtz Oct 27, 2025
32b1cf2
Updated and functional state with e2e working for new stats calculati…
markurtz Oct 31, 2025
c5ae813
Fixes from reviews and types/style
markurtz Nov 4, 2025
74cf8ea
Fixes from reviews
markurtz Nov 4, 2025
9791b90
update pylock
markurtz Nov 4, 2025
d3c1780
Update src/guidellm/benchmark/entrypoints.py
markurtz Nov 4, 2025
c51528c
fix typing, remove dataset preprocessing until it's migrated
markurtz Nov 4, 2025
55ca8d0
Further fixes for precommit and formatting/typing
markurtz Nov 4, 2025
52ca42a
Fix unit tests for refactor and schemas package
markurtz Nov 5, 2025
b07d068
minor fix from review
markurtz Nov 5, 2025
f04d3c9
Fixes for non streaming request pathways
markurtz Nov 6, 2025
d2a8faf
Rework how warmup, cooldown, and rampup works due to issues identifie…
markurtz Nov 7, 2025
106f9c9
update lock file
markurtz Nov 7, 2025
b9aff65
Lock with linux platform set
jaredoconnell Nov 7, 2025
52b6fe2
Fixes for issues with output overrides as well as errors in cooldown,…
markurtz Nov 12, 2025
df6c6e5
Fix for output_path override not being respected
markurtz Nov 12, 2025
3591fd7
Fixes for request formatter not being overwritten correctly from scen…
markurtz Nov 12, 2025
4520425
e2e test fixes from refactor
markurtz Nov 12, 2025
5e5b2c1
Bug fix for request-formatter-kwargs cli ignored if request-type not …
markurtz Nov 12, 2025
1f5edff
updates from review for more thourough comments and typo fixes
markurtz Nov 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,856 changes: 371 additions & 2,485 deletions pylock.toml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dependencies = [
"pyyaml>=6.0.0",
"rich",
"sanic",
"tabulate",
"transformers",
"uvloop>=0.18",
"torch",
Expand Down Expand Up @@ -129,6 +130,7 @@ dev = [
"mdformat-gfm~=0.3.6",

# type-checking
"pandas-stubs",
"types-PyYAML~=6.0.1",
"types-requests~=2.32.0",
"types-toml",
Expand Down
273 changes: 84 additions & 189 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,12 @@
reimport_benchmarks_report,
)
from guidellm.mock_server import MockServer, MockServerConfig
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
from guidellm.scheduler import StrategyType
from guidellm.schemas import GenerativeRequestType
from guidellm.settings import print_config
from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
from guidellm.utils import cli as cli_tools

__all__ = [
"STRATEGY_PROFILE_CHOICES",
"benchmark",
"cli",
"config",
"dataset",
"decode_escaped_str",
"from_file",
"mock_server",
"preprocess",
"run",
]

STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
"""Available strategy and profile type choices for benchmark execution."""

Expand Down Expand Up @@ -157,9 +143,8 @@ def benchmark():
)
@click.option(
"--rate",
type=str,
callback=cli_tools.parse_list_floats,
multiple=False,
multiple=True,
default=BenchmarkGenerativeTextArgs.get_default("rate"),
help=(
"Benchmark rate(s) to test. Meaning depends on profile: "
Expand Down Expand Up @@ -256,7 +241,7 @@ def benchmark():
help="Number of worker processes for data loading.",
)
@click.option(
"--dataloader_kwargs",
"--dataloader-kwargs",
default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
callback=cli_tools.parse_json,
help="JSON string of arguments to pass to the dataloader constructor.",
Expand All @@ -269,58 +254,85 @@ def benchmark():
)
# Output configuration
@click.option(
"--output-path",
type=click.Path(),
default=BenchmarkGenerativeTextArgs.get_default("output_path"),
help=(
"Path to save output files. Can be a directory or file. "
"If a file, saves that format; mismatched formats save to parent directory."
),
"--output-dir",
type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
default=BenchmarkGenerativeTextArgs.get_default("output_dir"),
help="The directory path to save file output types in",
)
@click.option(
"--output-formats",
"--outputs",
callback=cli_tools.parse_list,
multiple=True,
type=str,
default=BenchmarkGenerativeTextArgs.get_default("output_formats"),
help="Output formats for results (e.g., console, json, html, csv).",
default=BenchmarkGenerativeTextArgs.get_default("outputs"),
help=(
"The filename.ext for each of the outputs to create or the "
"alises (json, csv, html) for the output files to create with "
"their default file names (benchmark.[EXT])"
),
)
@click.option(
"--disable-console-outputs",
is_flag=True,
help="Disable console output.",
"--output-path",
type=click.Path(),
default=None,
help=(
"Legacy parameter for the output path to save the output result to. "
"Resolves to fill in output-dir and outputs based on input path."
),
)
# Updates configuration
@click.option(
"--disable-progress",
"--disable-console",
"--disable-console-outputs", # legacy alias
"disable_console",
is_flag=True,
help="Disable progress updates to the console.",
help=(
"Disable all outputs to the console (updates, interactive progress, results)."
),
)
@click.option(
"--display-scheduler-stats",
"--disable-console-interactive",
"--disable-progress", # legacy alias
"disable_console_interactive",
is_flag=True,
help="Display scheduler process statistics.",
help="Disable interactive console progress updates.",
)
# Aggregators configuration
@click.option(
"--warmup",
"--warmup-percent", # legacy alias
"warmup",
type=float,
default=BenchmarkGenerativeTextArgs.get_default("warmup"),
callback=cli_tools.parse_json,
help=(
"Warmup specification: if in (0,1) = percent, if >=1 = number of "
"requests/seconds (depends on active constraint)."
"Warmup specification: int, float, or dict as string "
"(json or key=value). "
"Controls time or requests before measurement starts. "
"Numeric in (0, 1): percent of duration or request count. "
"Numeric >=1: duration in seconds or request count. "
"Advanced config: see TransientPhaseConfig schema."
),
)
@click.option(
"--cooldown",
"--cooldown-percent", # legacy alias
"cooldown",
type=float,
default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
callback=cli_tools.parse_json,
help=(
"Cooldown specification: if in (0,1) = percent, if >=1 = number of "
"requests/seconds (depends on active constraint)."
"Cooldown specification: int, float, or dict as string "
"(json or key=value). "
"Controls time or requests after measurement ends. "
"Numeric in (0, 1): percent of duration or request count. "
"Numeric >=1: duration in seconds or request count. "
"Advanced config: see TransientPhaseConfig schema."
),
)
@click.option(
"--rampup",
type=float,
default=BenchmarkGenerativeTextArgs.get_default("rampup"),
help=(
"The time, in seconds, to ramp up the request rate over. "
"Only applicable for Throughput/Concurrent strategies"
),
)
@click.option(
Expand Down Expand Up @@ -371,32 +383,39 @@ def benchmark():
help="Maximum global error rate across all benchmarks.",
)
def run(**kwargs):
# Only set CLI args that differ from click defaults
kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)

# Handle remapping for request params
request_type = kwargs.pop("request_type", None)
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
kwargs["data_request_formatter"] = (
request_type
if not request_formatter_kwargs
else {"request_type": request_type, **request_formatter_kwargs}
)
kwargs["data"] = cli_tools.format_list_arg(
kwargs.get("data"), default=[], simplify_single=False
)
kwargs["data_args"] = cli_tools.format_list_arg(
kwargs.get("data_args"), default=[], simplify_single=False
)
kwargs["rate"] = cli_tools.format_list_arg(
kwargs.get("rate"), default=None, simplify_single=False
if request_type is not None:
kwargs["data_request_formatter"] = (
request_type
if not request_formatter_kwargs
else {"request_type": request_type, **request_formatter_kwargs}
)
elif request_formatter_kwargs is not None:
kwargs["data_request_formatter"] = request_formatter_kwargs

# Handle output path remapping
if (output_path := kwargs.pop("output_path", None)) is not None:
path = Path(output_path)
if path.is_dir():
kwargs["output_dir"] = path
else:
kwargs["output_dir"] = path.parent
kwargs["outputs"] = (path.name,)

# Handle console options
disable_console = kwargs.pop("disable_console", False)
disable_console_interactive = (
kwargs.pop("disable_console_interactive", False) or disable_console
)

disable_console_outputs = kwargs.pop("disable_console_outputs", False)
display_scheduler_stats = kwargs.pop("display_scheduler_stats", False)
disable_progress = kwargs.pop("disable_progress", False)

try:
# Only set CLI args that differ from click defaults
new_kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
args = BenchmarkGenerativeTextArgs.create(
scenario=new_kwargs.pop("scenario", None), **new_kwargs
scenario=kwargs.pop("scenario", None), **kwargs
)
except ValidationError as err:
# Translate pydantic valdation error to click argument error
Expand All @@ -412,13 +431,11 @@ def run(**kwargs):
benchmark_generative_text(
args=args,
progress=(
GenerativeConsoleBenchmarkerProgress(
display_scheduler_stats=display_scheduler_stats
)
if not disable_progress
GenerativeConsoleBenchmarkerProgress()
if not disable_console_interactive
else None
),
console=Console() if not disable_console_outputs else None,
console=Console() if not disable_console else None,
)
)

Expand Down Expand Up @@ -469,128 +486,6 @@ def preprocess():
"""Dataset preprocessing utilities."""


@preprocess.command(
"dataset",
help=(
"Process a dataset to have specific prompt and output token sizes. "
"Supports multiple strategies for handling prompts and optional "
"Hugging Face Hub upload.\n\n"
"DATA: Path to the input dataset or dataset ID.\n\n"
"OUTPUT_PATH: Path to save the processed dataset, including file suffix."
),
context_settings={"auto_envvar_prefix": "GUIDELLM"},
)
@click.argument(
"data",
type=str,
required=True,
)
@click.argument(
"output_path",
type=click.Path(file_okay=True, dir_okay=False, writable=True, resolve_path=True),
required=True,
)
@click.option(
"--processor",
type=str,
required=True,
help="Processor or tokenizer name for calculating token counts.",
)
@click.option(
"--processor-args",
default=None,
callback=cli_tools.parse_json,
help="JSON string of arguments to pass to the processor constructor.",
)
@click.option(
"--data-args",
callback=cli_tools.parse_json,
help="JSON string of arguments to pass to dataset creation.",
)
@click.option(
"--short-prompt-strategy",
type=click.Choice([s.value for s in ShortPromptStrategy]),
default=ShortPromptStrategy.IGNORE.value,
show_default=True,
help="Strategy for handling prompts shorter than target length.",
)
@click.option(
"--pad-char",
type=str,
default="",
callback=decode_escaped_str,
help="Character to pad short prompts with when using 'pad' strategy.",
)
@click.option(
"--concat-delimiter",
type=str,
default="",
help=(
"Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
),
)
@click.option(
"--prompt-tokens",
type=str,
default=None,
help="Prompt tokens configuration (JSON, YAML file, or key=value string).",
)
@click.option(
"--output-tokens",
type=str,
default=None,
help="Output tokens configuration (JSON, YAML file, or key=value string).",
)
@click.option(
"--push-to-hub",
is_flag=True,
help="Push the processed dataset to Hugging Face Hub.",
)
@click.option(
"--hub-dataset-id",
type=str,
default=None,
help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
)
@click.option(
"--random-seed",
type=int,
default=42,
show_default=True,
help="Random seed for reproducible token sampling.",
)
def dataset(
data,
output_path,
processor,
processor_args,
data_args,
short_prompt_strategy,
pad_char,
concat_delimiter,
prompt_tokens,
output_tokens,
push_to_hub,
hub_dataset_id,
random_seed,
):
process_dataset(
data=data,
output_path=output_path,
processor=processor,
prompt_tokens=prompt_tokens,
output_tokens=output_tokens,
processor_args=processor_args,
data_args=data_args,
short_prompt_strategy=short_prompt_strategy,
pad_char=pad_char,
concat_delimiter=concat_delimiter,
push_to_hub=push_to_hub,
hub_dataset_id=hub_dataset_id,
random_seed=random_seed,
)


@cli.command(
"mock-server",
help=(
Expand Down
3 changes: 1 addition & 2 deletions src/guidellm/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,8 @@ def requests_limit(self) -> int | None:
return None

@abstractmethod
async def default_model(self) -> str | None:
async def default_model(self) -> str:
"""
:return: The default model name or identifier for generation requests,
None if no default model is available
"""
...
Loading
Loading