Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/mkdocs/hooks/generate_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,5 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
doc_path = ARGPARSE_DOC_DIR / f"{stem}.md"
# Specify encoding for building on Windows
with open(doc_path, "w", encoding="utf-8") as f:
f.write(parser.format_help())
f.write(super(type(parser), parser).format_help())
logger.info("Argparse generated: %s", doc_path.relative_to(ROOT_DIR))
4 changes: 2 additions & 2 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ def is_online_quantization(quantization: Any) -> bool:


NEEDS_HELP = (
"--help" in (argv := sys.argv) # vllm SUBCOMMAND --help
or (argv0 := argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND
any("--help" in arg for arg in sys.argv) # vllm SUBCOMMAND --help
or (argv0 := sys.argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND
or argv0.endswith("mkdocs/__main__.py") # python -m mkdocs SUBCOMMAND
)

Expand Down
13 changes: 5 additions & 8 deletions vllm/entrypoints/cli/benchmark/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
from vllm.entrypoints.cli.types import CLISubcommand
from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
show_filtered_argument_or_group_from_help)
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG

if typing.TYPE_CHECKING:
from vllm.utils import FlexibleArgumentParser
Expand All @@ -33,9 +32,8 @@ def subparser_init(
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
bench_parser = subparsers.add_parser(
self.name,
help=self.help,
description=self.help,
usage="vllm bench <bench_type> [options]")
usage=f"vllm {self.name} <bench_type> [options]")
bench_subparsers = bench_parser.add_subparsers(required=True,
dest="bench_type")

Expand All @@ -44,13 +42,12 @@ def subparser_init(
cmd_cls.name,
help=cmd_cls.help,
description=cmd_cls.help,
usage=f"vllm bench {cmd_cls.name} [options]",
usage=f"vllm {self.name} {cmd_cls.name} [options]",
)
cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd)
cmd_cls.add_cli_args(cmd_subparser)
show_filtered_argument_or_group_from_help(cmd_subparser,
["bench", cmd_cls.name])
cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG
cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
subcmd=f"{self.name} {cmd_cls.name}")
return bench_parser


Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main():

parser = FlexibleArgumentParser(
description="vLLM CLI",
epilog=VLLM_SUBCMD_PARSER_EPILOG,
epilog=VLLM_SUBCMD_PARSER_EPILOG.format(subcmd="[subcommand]"),
)
parser.add_argument(
'-v',
Expand Down
10 changes: 4 additions & 6 deletions vllm/entrypoints/cli/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import typing

from vllm.entrypoints.cli.types import CLISubcommand
from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
show_filtered_argument_or_group_from_help)
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
from vllm.logger import init_logger

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -50,7 +49,7 @@ def subparser_init(
from vllm.entrypoints.openai.run_batch import make_arg_parser

run_batch_parser = subparsers.add_parser(
"run-batch",
self.name,
help="Run batch prompts and write results to file.",
description=(
"Run batch prompts using vLLM's OpenAI-compatible API.\n"
Expand All @@ -59,9 +58,8 @@ def subparser_init(
"vllm run-batch -i INPUT.jsonl -o OUTPUT.jsonl --model <model>",
)
run_batch_parser = make_arg_parser(run_batch_parser)
show_filtered_argument_or_group_from_help(run_batch_parser,
["run-batch"])
run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG
run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
subcmd=self.name)
return run_batch_parser


Expand Down
20 changes: 13 additions & 7 deletions vllm/entrypoints/cli/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
setup_server)
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
validate_parsed_serve_args)
from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
show_filtered_argument_or_group_from_help)
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (FlexibleArgumentParser, decorate_logs, get_tcp_uri,
Expand All @@ -29,6 +28,14 @@

logger = init_logger(__name__)

DESCRIPTION = """Launch a local OpenAI-compatible API server to serve LLM
completions via HTTP. Defaults to Qwen/Qwen3-0.6B if no model is specified.

Search by using: `--help=<ConfigGroup>` to explore options by section (e.g.,
--help=ModelConfig, --help=Frontend)
Use `--help=all` to show all available flags at once.
"""


class ServeSubcommand(CLISubcommand):
"""The `serve` subcommand for the vLLM CLI. """
Expand Down Expand Up @@ -56,14 +63,13 @@ def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
serve_parser = subparsers.add_parser(
"serve",
help="Start the vLLM OpenAI Compatible API server.",
description="Start the vLLM OpenAI Compatible API server.",
self.name,
description=DESCRIPTION,
usage="vllm serve [model_tag] [options]")

serve_parser = make_arg_parser(serve_parser)
show_filtered_argument_or_group_from_help(serve_parser, ["serve"])
serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG
serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
subcmd=self.name)
return serve_parser


Expand Down
113 changes: 9 additions & 104 deletions vllm/entrypoints/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import argparse
import asyncio
import dataclasses
import functools
import os
import subprocess
import sys
from argparse import Namespace
from typing import Any, Optional, Union

from fastapi import Request
Expand All @@ -25,13 +23,10 @@
logger = init_logger(__name__)

VLLM_SUBCMD_PARSER_EPILOG = (
"Tip: Use `vllm [serve|run-batch|bench <bench_type>] "
"--help=<keyword>` to explore arguments from help.\n"
" - To view a argument group: --help=ModelConfig\n"
" - To view a single argument: --help=max-num-seqs\n"
" - To search by keyword: --help=max\n"
" - To list all groups: --help=listgroup\n"
" - To view help with pager: --help=page")
"For full list: vllm {subcmd} --help=all\n"
"For a section: vllm {subcmd} --help=ModelConfig (case-insensitive)\n" # noqa: E501
"For a flag: vllm {subcmd} --help=max-model-len (_ or - accepted)\n" # noqa: E501
"Documentation: https://docs.vllm.ai\n")


async def listen_for_disconnect(request: Request) -> None:
Expand Down Expand Up @@ -196,96 +191,6 @@ def _validate_truncation_size(
return truncate_prompt_tokens


def _output_with_pager(text: str):
"""Output text using scrolling view if available and appropriate."""

pagers = ['less -R', 'more']
for pager_cmd in pagers:
try:
proc = subprocess.Popen(pager_cmd.split(),
stdin=subprocess.PIPE,
text=True)
proc.communicate(input=text)
return
except (subprocess.SubprocessError, OSError, FileNotFoundError):
continue

# No pager worked, fall back to normal print
print(text)


def show_filtered_argument_or_group_from_help(parser: argparse.ArgumentParser,
subcommand_name: list[str]):

# Only handle --help=<keyword> for the current subcommand.
# Since subparser_init() runs for all subcommands during CLI setup,
# we skip processing if the subcommand name is not in sys.argv.
# sys.argv[0] is the program name. The subcommand follows.
# e.g., for `vllm bench latency`,
# sys.argv is `['vllm', 'bench', 'latency', ...]`
# and subcommand_name is "bench latency".
if len(sys.argv) <= len(subcommand_name) or sys.argv[
1:1 + len(subcommand_name)] != subcommand_name:
return

for arg in sys.argv:
if arg.startswith('--help='):
search_keyword = arg.split('=', 1)[1]

# Enable paged view for full help
if search_keyword == 'page':
help_text = parser.format_help()
_output_with_pager(help_text)
sys.exit(0)

# List available groups
if search_keyword == 'listgroup':
output_lines = ["\nAvailable argument groups:"]
for group in parser._action_groups:
if group.title and not group.title.startswith(
"positional arguments"):
output_lines.append(f" - {group.title}")
if group.description:
output_lines.append(" " +
group.description.strip())
output_lines.append("")
_output_with_pager("\n".join(output_lines))
sys.exit(0)

# For group search
formatter = parser._get_formatter()
for group in parser._action_groups:
if group.title and group.title.lower() == search_keyword.lower(
):
formatter.start_section(group.title)
formatter.add_text(group.description)
formatter.add_arguments(group._group_actions)
formatter.end_section()
_output_with_pager(formatter.format_help())
sys.exit(0)

# For single arg
matched_actions = []

for group in parser._action_groups:
for action in group._group_actions:
# search option name
if any(search_keyword.lower() in opt.lower()
for opt in action.option_strings):
matched_actions.append(action)

if matched_actions:
header = f"\nParameters matching '{search_keyword}':\n"
formatter = parser._get_formatter()
formatter.add_arguments(matched_actions)
_output_with_pager(header + formatter.format_help())
sys.exit(0)

print(f"\nNo group or parameter matching '{search_keyword}'")
print("Tip: use `--help=listgroup` to view all groups.")
sys.exit(1)


def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest,
CompletionRequest],
input_length: int, default_sampling_params: dict) -> int:
Expand All @@ -301,11 +206,11 @@ def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest,
if val is not None)


def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]):
def log_non_default_args(args: Union[Namespace, EngineArgs]):
non_default_args = {}

# Handle argparse.Namespace
if isinstance(args, argparse.Namespace):
# Handle Namespace
if isinstance(args, Namespace):
parser = make_arg_parser(FlexibleArgumentParser())
for arg, default in vars(parser.parse_args([])).items():
if default != getattr(args, arg):
Expand All @@ -323,6 +228,6 @@ def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]):
non_default_args["model"] = default_args.model
else:
raise TypeError("Unsupported argument type. " \
"Must be argparse.Namespace or EngineArgs instance.")
"Must be Namespace or EngineArgs instance.")

logger.info("non-default args: %s", non_default_args)
Loading