Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions vllm/entrypoints/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import torch

from vllm import envs
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.pooling_params import PoolingParams
Expand Down Expand Up @@ -34,17 +35,36 @@ def log_inputs(
if prompt_token_ids is not None:
prompt_token_ids = prompt_token_ids[:max_log_len]

if not envs.VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT:
# Original logging behavior
logger.info(
"Received request %s: prompt: %r, "
"params: %s, prompt_token_ids: %s, "
"prompt_embeds shape: %s, "
"lora_request: %s.",
request_id,
prompt,
params,
prompt_token_ids,
prompt_embeds.shape if prompt_embeds is not None else None,
lora_request,
)
return

# Split logging: basic info at INFO level, prompt details at DEBUG level
logger.info(
"Received request %s: prompt: %r, "
"params: %s, prompt_token_ids: %s, "
"prompt_embeds shape: %s, "
"lora_request: %s.",
"Received request %s: params: %s, lora_request: %s.",
request_id,
prompt,
params,
lora_request,
)
logger.debug(
"Request %s prompt details: prompt: %r, prompt_token_ids: %s, "
"prompt_embeds shape: %s",
request_id,
prompt,
prompt_token_ids,
prompt_embeds.shape if prompt_embeds is not None else None,
lora_request,
)
Comment on lines +38 to 68
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation duplicates the logging logic. The original logger.info call is copied into the if not envs.VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT: block. This code duplication can lead to maintenance issues, as future changes to the log message might not be applied in both places.

To improve maintainability and avoid redundancy, I suggest refactoring this to use dictionary-based string formatting. This approach centralizes the log data and constructs the log messages conditionally, making the code cleaner and easier to maintain. I've also slightly modified prompt_embeds shape to prompt_embeds_shape in the log message to make it a valid identifier for dictionary-based formatting.

        log_data = {
            "request_id": request_id,
            "prompt": prompt,
            "params": params,
            "prompt_token_ids": prompt_token_ids,
            "prompt_embeds_shape": prompt_embeds.shape if prompt_embeds is not None else None,
            "lora_request": lora_request,
        }

        if not envs.VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT:
            # Original logging behavior
            logger.info(
                "Received request %(request_id)s: prompt: %(prompt)r, "
                "params: %(params)s, prompt_token_ids: %(prompt_token_ids)s, "
                "prompt_embeds_shape: %(prompt_embeds_shape)s, "
                "lora_request: %(lora_request)s.",
                log_data,
            )
            return

        # Split logging: basic info at INFO level, prompt details at DEBUG level
        logger.info(
            "Received request %(request_id)s: params: %(params)s, lora_request: %(lora_request)s.",
            log_data,
        )
        logger.debug(
            "Request %(request_id)s prompt details: prompt: %(prompt)r, "
            "prompt_token_ids: %(prompt_token_ids)s, "
            "prompt_embeds_shape: %(prompt_embeds_shape)s",
            log_data,
        )


def log_outputs(
Expand Down
8 changes: 8 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
CUDA_VISIBLE_DEVICES: str | None = None
VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60
VLLM_API_KEY: str | None = None
VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT: bool = False
S3_ACCESS_KEY_ID: str | None = None
S3_SECRET_ACCESS_KEY: str | None = None
S3_ENDPOINT_URL: str | None = None
Expand Down Expand Up @@ -528,6 +529,13 @@ def get_vllm_port() -> int | None:
"VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False"
).lower()
== "true",
# Whether to enable debug logging for API server request prompts
# When enabled, splits request logging: basic info at INFO level,
# prompt details (prompt, prompt_token_ids, prompt_embeds) at DEBUG level
"VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT": lambda: os.environ.get(
"VLLM_DEBUG_LOG_API_SERVER_REQUEST_PROMPT", "False"
).lower()
== "true",
# S3 access information, used for tensorizer to load model from S3
"S3_ACCESS_KEY_ID": lambda: os.environ.get("S3_ACCESS_KEY_ID", None),
"S3_SECRET_ACCESS_KEY": lambda: os.environ.get("S3_SECRET_ACCESS_KEY", None),
Expand Down