diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index 91454ec272b8..85a6ccef9c4c 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -167,5 +167,5 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): doc_path = ARGPARSE_DOC_DIR / f"{stem}.md" # Specify encoding for building on Windows with open(doc_path, "w", encoding="utf-8") as f: - f.write(parser.format_help()) + f.write(super(type(parser), parser).format_help()) logger.info("Argparse generated: %s", doc_path.relative_to(ROOT_DIR)) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 556a490ffa10..14a586fadf8e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -156,8 +156,8 @@ def is_online_quantization(quantization: Any) -> bool: NEEDS_HELP = ( - "--help" in (argv := sys.argv) # vllm SUBCOMMAND --help - or (argv0 := argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND + any("--help" in arg for arg in sys.argv) # vllm SUBCOMMAND --help + or (argv0 := sys.argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND or argv0.endswith("mkdocs/__main__.py") # python -m mkdocs SUBCOMMAND ) diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py index 87fb9f351464..0c65fd97fc04 100644 --- a/vllm/entrypoints/cli/benchmark/main.py +++ b/vllm/entrypoints/cli/benchmark/main.py @@ -8,8 +8,7 @@ from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase from vllm.entrypoints.cli.types import CLISubcommand -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG if typing.TYPE_CHECKING: from vllm.utils import FlexibleArgumentParser @@ -33,9 +32,8 @@ def subparser_init( subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser: bench_parser = subparsers.add_parser( self.name, - help=self.help, description=self.help, - usage="vllm bench [options]") + usage=f"vllm {self.name} [options]") bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type") @@ -44,13 +42,12 @@ def subparser_init( cmd_cls.name, help=cmd_cls.help, description=cmd_cls.help, - usage=f"vllm bench {cmd_cls.name} [options]", + usage=f"vllm {self.name} {cmd_cls.name} [options]", ) cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd) cmd_cls.add_cli_args(cmd_subparser) - show_filtered_argument_or_group_from_help(cmd_subparser, - ["bench", cmd_cls.name]) - cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG + cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=f"{self.name} {cmd_cls.name}") return bench_parser diff --git a/vllm/entrypoints/cli/main.py b/vllm/entrypoints/cli/main.py index fed3ea650405..f1bcbc8262bd 100644 --- a/vllm/entrypoints/cli/main.py +++ b/vllm/entrypoints/cli/main.py @@ -30,7 +30,7 @@ def main(): parser = FlexibleArgumentParser( description="vLLM CLI", - epilog=VLLM_SUBCMD_PARSER_EPILOG, + epilog=VLLM_SUBCMD_PARSER_EPILOG.format(subcmd="[subcommand]"), ) parser.add_argument( '-v', diff --git a/vllm/entrypoints/cli/run_batch.py b/vllm/entrypoints/cli/run_batch.py index 86491678d7d2..e669464bff83 100644 --- a/vllm/entrypoints/cli/run_batch.py +++ b/vllm/entrypoints/cli/run_batch.py @@ -9,8 +9,7 @@ import typing from vllm.entrypoints.cli.types import CLISubcommand -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG from vllm.logger import init_logger if typing.TYPE_CHECKING: @@ -50,7 +49,7 @@ def subparser_init( from vllm.entrypoints.openai.run_batch import make_arg_parser run_batch_parser = subparsers.add_parser( - "run-batch", + self.name, help="Run batch prompts and write results to file.", description=( "Run batch prompts using vLLM's OpenAI-compatible API.\n" @@ -59,9 +58,8 @@ def subparser_init( "vllm run-batch -i INPUT.jsonl -o OUTPUT.jsonl --model ", ) run_batch_parser = make_arg_parser(run_batch_parser) - show_filtered_argument_or_group_from_help(run_batch_parser, - ["run-batch"]) - run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG + run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=self.name) return run_batch_parser diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index de47bf00932e..0a5547144800 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -14,8 +14,7 @@ setup_server) from vllm.entrypoints.openai.cli_args import (make_arg_parser, validate_parsed_serve_args) -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext from vllm.utils import (FlexibleArgumentParser, decorate_logs, get_tcp_uri, @@ -29,6 +28,14 @@ logger = init_logger(__name__) +DESCRIPTION = """Launch a local OpenAI-compatible API server to serve LLM +completions via HTTP. Defaults to Qwen/Qwen3-0.6B if no model is specified. + +Search by using: `--help=` to explore options by section (e.g., +--help=ModelConfig, --help=Frontend) + Use `--help=all` to show all available flags at once. +""" + class ServeSubcommand(CLISubcommand): """The `serve` subcommand for the vLLM CLI. """ @@ -56,14 +63,13 @@ def subparser_init( self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser: serve_parser = subparsers.add_parser( - "serve", - help="Start the vLLM OpenAI Compatible API server.", - description="Start the vLLM OpenAI Compatible API server.", + self.name, + description=DESCRIPTION, usage="vllm serve [model_tag] [options]") serve_parser = make_arg_parser(serve_parser) - show_filtered_argument_or_group_from_help(serve_parser, ["serve"]) - serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG + serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=self.name) return serve_parser diff --git a/vllm/entrypoints/utils.py b/vllm/entrypoints/utils.py index d2d7dba3ae46..4a90fe094ae2 100644 --- a/vllm/entrypoints/utils.py +++ b/vllm/entrypoints/utils.py @@ -1,13 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import argparse import asyncio import dataclasses import functools import os -import subprocess -import sys +from argparse import Namespace from typing import Any, Optional, Union from fastapi import Request @@ -25,13 +23,10 @@ logger = init_logger(__name__) VLLM_SUBCMD_PARSER_EPILOG = ( - "Tip: Use `vllm [serve|run-batch|bench ] " - "--help=` to explore arguments from help.\n" - " - To view a argument group: --help=ModelConfig\n" - " - To view a single argument: --help=max-num-seqs\n" - " - To search by keyword: --help=max\n" - " - To list all groups: --help=listgroup\n" - " - To view help with pager: --help=page") + "For full list: vllm {subcmd} --help=all\n" + "For a section: vllm {subcmd} --help=ModelConfig (case-insensitive)\n" # noqa: E501 + "For a flag: vllm {subcmd} --help=max-model-len (_ or - accepted)\n" # noqa: E501 + "Documentation: https://docs.vllm.ai\n") async def listen_for_disconnect(request: Request) -> None: @@ -196,96 +191,6 @@ def _validate_truncation_size( return truncate_prompt_tokens -def _output_with_pager(text: str): - """Output text using scrolling view if available and appropriate.""" - - pagers = ['less -R', 'more'] - for pager_cmd in pagers: - try: - proc = subprocess.Popen(pager_cmd.split(), - stdin=subprocess.PIPE, - text=True) - proc.communicate(input=text) - return - except (subprocess.SubprocessError, OSError, FileNotFoundError): - continue - - # No pager worked, fall back to normal print - print(text) - - -def show_filtered_argument_or_group_from_help(parser: argparse.ArgumentParser, - subcommand_name: list[str]): - - # Only handle --help= for the current subcommand. - # Since subparser_init() runs for all subcommands during CLI setup, - # we skip processing if the subcommand name is not in sys.argv. - # sys.argv[0] is the program name. The subcommand follows. - # e.g., for `vllm bench latency`, - # sys.argv is `['vllm', 'bench', 'latency', ...]` - # and subcommand_name is "bench latency". - if len(sys.argv) <= len(subcommand_name) or sys.argv[ - 1:1 + len(subcommand_name)] != subcommand_name: - return - - for arg in sys.argv: - if arg.startswith('--help='): - search_keyword = arg.split('=', 1)[1] - - # Enable paged view for full help - if search_keyword == 'page': - help_text = parser.format_help() - _output_with_pager(help_text) - sys.exit(0) - - # List available groups - if search_keyword == 'listgroup': - output_lines = ["\nAvailable argument groups:"] - for group in parser._action_groups: - if group.title and not group.title.startswith( - "positional arguments"): - output_lines.append(f" - {group.title}") - if group.description: - output_lines.append(" " + - group.description.strip()) - output_lines.append("") - _output_with_pager("\n".join(output_lines)) - sys.exit(0) - - # For group search - formatter = parser._get_formatter() - for group in parser._action_groups: - if group.title and group.title.lower() == search_keyword.lower( - ): - formatter.start_section(group.title) - formatter.add_text(group.description) - formatter.add_arguments(group._group_actions) - formatter.end_section() - _output_with_pager(formatter.format_help()) - sys.exit(0) - - # For single arg - matched_actions = [] - - for group in parser._action_groups: - for action in group._group_actions: - # search option name - if any(search_keyword.lower() in opt.lower() - for opt in action.option_strings): - matched_actions.append(action) - - if matched_actions: - header = f"\nParameters matching '{search_keyword}':\n" - formatter = parser._get_formatter() - formatter.add_arguments(matched_actions) - _output_with_pager(header + formatter.format_help()) - sys.exit(0) - - print(f"\nNo group or parameter matching '{search_keyword}'") - print("Tip: use `--help=listgroup` to view all groups.") - sys.exit(1) - - def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest, CompletionRequest], input_length: int, default_sampling_params: dict) -> int: @@ -301,11 +206,11 @@ def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest, if val is not None) -def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]): +def log_non_default_args(args: Union[Namespace, EngineArgs]): non_default_args = {} - # Handle argparse.Namespace - if isinstance(args, argparse.Namespace): + # Handle Namespace + if isinstance(args, Namespace): parser = make_arg_parser(FlexibleArgumentParser()) for arg, default in vars(parser.parse_args([])).items(): if default != getattr(args, arg): @@ -323,6 +228,6 @@ def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]): non_default_args["model"] = default_args.model else: raise TypeError("Unsupported argument type. " \ - "Must be argparse.Namespace or EngineArgs instance.") + "Must be Namespace or EngineArgs instance.") logger.info("non-default args: %s", non_default_args) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 0a7af79f7a17..c502a69ea500 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1720,6 +1720,7 @@ class FlexibleArgumentParser(ArgumentParser): "Additionally, list elements can be passed individually using +:\n" ' --json-arg \'{"key4": ["value3", "value4", "value5"]}\'\n' " --json-arg.key4+ value3 --json-arg.key4+=\'value4,value5\'\n\n") + _search_keyword: Optional[str] = None def __init__(self, *args, **kwargs): # Set the default "formatter_class" to SortedHelpFormatter @@ -1768,13 +1769,79 @@ def add_argument_group(self, *args, **kwargs): self._action_groups.append(group) return group - def format_help(self) -> str: - # Add tip about JSON arguments to the epilog - epilog = self.epilog or "" - if (self.add_json_tip - and not epilog.startswith(FlexibleArgumentParser._json_tip)): - self.epilog = FlexibleArgumentParser._json_tip + epilog - return super().format_help() + def format_help(self): + # Only use custom help formatting for bottom level parsers + if self._subparsers is not None: + return super().format_help() + + formatter = self._get_formatter() + + # Handle keyword search of the args + if (search_keyword := self._search_keyword) is not None: + # Normalise the search keyword + search_keyword = search_keyword.lower().replace("_", "-") + # Return full help if searching for 'all' + if search_keyword == 'all': + self.epilog = self._json_tip + return super().format_help() + + # Return group help if searching for a group title + for group in self._action_groups: + if group.title and group.title.lower() == search_keyword: + formatter.start_section(group.title) + formatter.add_text(group.description) + formatter.add_arguments(group._group_actions) + formatter.end_section() + formatter.add_text(self._json_tip) + return formatter.format_help() + + # Return matched args if searching for an arg name + matched_actions = [] + for group in self._action_groups: + for action in group._group_actions: + # search option name + if any(search_keyword in opt.lower() + for opt in action.option_strings): + matched_actions.append(action) + if matched_actions: + formatter.start_section( + f"Arguments matching '{search_keyword}'") + formatter.add_arguments(matched_actions) + formatter.end_section() + formatter.add_text(self._json_tip) + return formatter.format_help() + + # No match found + formatter.add_text( + f"No group or arguments matching '{search_keyword}'.\n" + "Use '--help' to see available groups or " + "'--help=all' to see all available parameters.") + return formatter.format_help() + + # usage + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + + # description + formatter.add_text(self.description) + + # positionals, optionals and user-defined groups + formatter.start_section("Config Groups") + config_groups = "" + for group in self._action_groups: + if not group._group_actions: + continue + title = group.title + description = group.description or "" + config_groups += f"{title: <24}{description}\n" + formatter.add_text(config_groups) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() def parse_args( # type: ignore[override] self, @@ -1807,7 +1874,11 @@ def repl(match: re.Match) -> str: # Convert underscores to dashes and vice versa in argument names processed_args = list[str]() for i, arg in enumerate(args): - if arg.startswith('--'): + if arg.startswith("--help="): + FlexibleArgumentParser._search_keyword = arg.split( + '=', 1)[-1].lower() + processed_args.append("--help") + elif arg.startswith('--'): if '=' in arg: key, value = arg.split('=', 1) key = pattern.sub(repl, key, count=1)