Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm/compilation/compiler_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,9 @@ def set_inductor_config(config, runtime_shape):
if isinstance(runtime_shape, int):
# for a specific batchsize, tuning triton kernel parameters
# can be beneficial
config["max_autotune"] = True
config["coordinate_descent_tuning"] = True
config["max_autotune"] = envs.VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE
config["coordinate_descent_tuning"] = (
envs.VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING)


class EagerAdaptor(CompilerInterface):
Expand Down
15 changes: 15 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@
VLLM_DBO_COMM_SMS: int = 20
GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = []
VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None
VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True
VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True
VLLM_USE_NCCL_SYMM_MEM: bool = False
VLLM_NCCL_INCLUDE_PATH: Optional[str] = None

Expand Down Expand Up @@ -1413,6 +1415,17 @@ def get_vllm_port() -> Optional[int]:
"code_interpreter",
"web_search_preview"]),

# Enable max_autotune & coordinate_descent_tuning in inductor_config
# to compile static shapes passed from compile_sizes in compilation_config
# If set to 1, enable max_autotune; By default, this is enabled (1)
"VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE":
lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "1"))),
# If set to 1, enable coordinate_descent_tuning;
# By default, this is enabled (1)
"VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING":
lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING",
"1"))),
Comment on lines +1421 to +1427
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation bool(int(os.getenv(...))) for parsing these boolean environment variables is not robust. It will raise a ValueError if a user sets the variable to a non-integer string like "true" or "false". To improve user experience and prevent runtime crashes from misconfiguration, it's better to adopt a more resilient parsing pattern that is also used elsewhere in this file.

Suggested change
"VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE":
lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "1"))),
# If set to 1, enable coordinate_descent_tuning;
# By default, this is enabled (1)
"VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING":
lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING",
"1"))),
"VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE":
lambda: os.getenv("VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "1").lower() in ("1", "true"),
# If set to 1, enable coordinate_descent_tuning;
# By default, this is enabled (1)
"VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING":
lambda: (os.getenv("VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING",
"1").lower() in ("1", "true")),


# Flag to enable NCCL symmetric memory allocation and registration
"VLLM_USE_NCCL_SYMM_MEM":
lambda: bool(int(os.getenv("VLLM_USE_NCCL_SYMM_MEM", "0"))),
Expand Down Expand Up @@ -1513,6 +1526,8 @@ def compute_hash() -> str:
"VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16",
"VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB",
"VLLM_ROCM_FP8_MFMA_PAGE_ATTN",
"VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE",
"VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING",
]
for key in environment_variables_to_hash:
# if this goes out of sync with environment_variables,
Expand Down