From da8557b0983b2f3701600582c05bf100c1585599 Mon Sep 17 00:00:00 2001 From: ApsarasX Date: Mon, 13 Oct 2025 08:52:47 +0000 Subject: [PATCH] [Frontend] Add request arrival log Signed-off-by: ApsarasX --- vllm/entrypoints/logger.py | 3 +++ vllm/entrypoints/openai/serving_chat.py | 17 +++++++++-------- .../openai/serving_classification.py | 1 + vllm/entrypoints/openai/serving_completion.py | 2 ++ vllm/entrypoints/openai/serving_embedding.py | 1 + vllm/entrypoints/openai/serving_engine.py | 5 +++++ vllm/entrypoints/openai/serving_pooling.py | 2 ++ vllm/entrypoints/openai/serving_score.py | 4 ++++ vllm/entrypoints/openai/serving_tokenization.py | 2 ++ vllm/entrypoints/openai/speech_to_text.py | 1 + 10 files changed, 30 insertions(+), 8 deletions(-) diff --git a/vllm/entrypoints/logger.py b/vllm/entrypoints/logger.py index c43d5ddf82fb..4c06e62145aa 100644 --- a/vllm/entrypoints/logger.py +++ b/vllm/entrypoints/logger.py @@ -17,6 +17,9 @@ class RequestLogger: def __init__(self, *, max_log_len: int | None) -> None: self.max_log_len = max_log_len + def log_arrival(self, request_id: str) -> None: + logger.info("Arrived request %s", request_id) + def log_inputs( self, request_id: str, diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 96525f206859..54981df9703f 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -179,6 +179,15 @@ async def create_chat_completion( if self.engine_client.errored: raise self.engine_client.dead_error + request_id = ( + f"chatcmpl-{self._base_request_id(raw_request, request.request_id)}" + ) + self._log_arrival(request_id) + + request_metadata = RequestResponseMetadata(request_id=request_id) + if raw_request: + raw_request.state.request_metadata = request_metadata + try: lora_request = self._maybe_get_adapters( request, supports_default_mm_loras=True @@ -257,14 +266,6 @@ async def create_chat_completion( logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(f"{e} {e.__cause__}") - request_id = ( - f"chatcmpl-{self._base_request_id(raw_request, request.request_id)}" - ) - - request_metadata = RequestResponseMetadata(request_id=request_id) - if raw_request: - raw_request.state.request_metadata = request_metadata - # Schedule the request and get the result generator. generators: list[AsyncGenerator[RequestOutput, None]] = [] try: diff --git a/vllm/entrypoints/openai/serving_classification.py b/vllm/entrypoints/openai/serving_classification.py index 45bbe732a680..7c1bd7f6ff15 100644 --- a/vllm/entrypoints/openai/serving_classification.py +++ b/vllm/entrypoints/openai/serving_classification.py @@ -146,6 +146,7 @@ async def create_classify( ) -> ClassificationResponse | ErrorResponse: model_name = self.models.model_name() request_id = f"{self.request_id_prefix}-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) ctx = ClassificationServeContext( request=request, diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 7af64306023a..151a6603fd70 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -107,6 +107,8 @@ async def create_completion( ) request_id = f"cmpl-{self._base_request_id(raw_request, request.request_id)}" + self._log_arrival(request_id) + created_time = int(time.time()) request_metadata = RequestResponseMetadata(request_id=request_id) diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 8f1df9a5aea6..b9297934f77c 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -629,6 +629,7 @@ async def create_embedding( f"{self.request_id_prefix}-" f"{self._base_request_id(raw_request, request.request_id)}" ) + self._log_arrival(request_id) ctx = EmbeddingServeContext( request=request, diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index a041950ffd20..6a464f9758dc 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1255,6 +1255,11 @@ def _get_prompt_components( return get_prompt_components(prompt) # type: ignore[arg-type] + def _log_arrival(self, request_id: str) -> None: + if self.request_logger is None: + return + self.request_logger.log_arrival(request_id) + def _log_inputs( self, request_id: str, diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 39cc539c1187..8df00ad1ce72 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -93,6 +93,8 @@ async def create_pooling( model_name = self.models.model_name() request_id = f"pool-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) + created_time = int(time.time()) is_io_processor_request = isinstance(request, IOProcessorRequest) diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py index 7506e17fe585..cca44bc6a5c7 100644 --- a/vllm/entrypoints/openai/serving_score.py +++ b/vllm/entrypoints/openai/serving_score.py @@ -351,6 +351,8 @@ async def create_score( return error_check_ret request_id = f"score-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) + created_time = int(time.time()) try: @@ -393,6 +395,8 @@ async def do_rerank( return error_check_ret request_id = f"rerank-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) + documents = request.documents top_n = ( request.top_n diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index 39aae0cd0495..eafcda2203c9 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -60,6 +60,7 @@ async def create_tokenize( return error_check_ret request_id = f"tokn-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) try: lora_request = self._maybe_get_adapters(request) @@ -135,6 +136,7 @@ async def create_detokenize( return error_check_ret request_id = f"tokn-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) lora_request = self._maybe_get_adapters(request) diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py index fa6e962a1dd7..9052fb1f8ac1 100644 --- a/vllm/entrypoints/openai/speech_to_text.py +++ b/vllm/entrypoints/openai/speech_to_text.py @@ -159,6 +159,7 @@ async def _create_speech_to_text( ) request_id = f"{self.task_type}-{self._base_request_id(raw_request)}" + self._log_arrival(request_id) request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: