From 1cb9353ccf1135a1934a3d089bba2971c07728c9 Mon Sep 17 00:00:00 2001 From: Aleksei Tsvetkov Date: Mon, 13 Oct 2025 10:43:01 +0000 Subject: [PATCH] [Misc] Feature flag to hide verbose prompt logging to debug level Signed-off-by: Aleksei Tsvetkov --- vllm/entrypoints/logger.py | 32 ++++++++++++++++++++++++++------ vllm/envs.py | 8 ++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/logger.py b/vllm/entrypoints/logger.py index c43d5ddf82fb..b7021e6c0581 100644 --- a/vllm/entrypoints/logger.py +++ b/vllm/entrypoints/logger.py @@ -5,6 +5,7 @@ import torch +from vllm import envs from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.pooling_params import PoolingParams @@ -34,17 +35,36 @@ def log_inputs( if prompt_token_ids is not None: prompt_token_ids = prompt_token_ids[:max_log_len] + if not envs.VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT: + # Original logging behavior + logger.info( + "Received request %s: prompt: %r, " + "params: %s, prompt_token_ids: %s, " + "prompt_embeds shape: %s, " + "lora_request: %s.", + request_id, + prompt, + params, + prompt_token_ids, + prompt_embeds.shape if prompt_embeds is not None else None, + lora_request, + ) + return + + # Split logging: basic info at INFO level, prompt details at DEBUG level logger.info( - "Received request %s: prompt: %r, " - "params: %s, prompt_token_ids: %s, " - "prompt_embeds shape: %s, " - "lora_request: %s.", + "Received request %s: params: %s, lora_request: %s.", request_id, - prompt, params, + lora_request, + ) + logger.debug( + "Request %s prompt details: prompt: %r, prompt_token_ids: %s, " + "prompt_embeds shape: %s", + request_id, + prompt, prompt_token_ids, prompt_embeds.shape if prompt_embeds is not None else None, - lora_request, ) def log_outputs( diff --git a/vllm/envs.py b/vllm/envs.py index 97076bec11b8..3a523264fd14 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -24,6 +24,7 @@ CUDA_VISIBLE_DEVICES: str | None = None VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60 VLLM_API_KEY: str | None = None + VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT: bool = False S3_ACCESS_KEY_ID: str | None = None S3_SECRET_ACCESS_KEY: str | None = None S3_ENDPOINT_URL: str | None = None @@ -528,6 +529,13 @@ def get_vllm_port() -> int | None: "VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False" ).lower() == "true", + # Whether to enable debug logging for API server request prompts + # When enabled, splits request logging: basic info at INFO level, + # prompt details (prompt, prompt_token_ids, prompt_embeds) at DEBUG level + "VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT": lambda: os.environ.get( + "VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT", "False" + ).lower() + == "true", # S3 access information, used for tensorizer to load model from S3 "S3_ACCESS_KEY_ID": lambda: os.environ.get("S3_ACCESS_KEY_ID", None), "S3_SECRET_ACCESS_KEY": lambda: os.environ.get("S3_SECRET_ACCESS_KEY", None),