From 687b52fd2ff96ee643220bbb4cc9aea12edd5646 Mon Sep 17 00:00:00 2001
From: Mark Kurtz <mark.kurtz@neuralmagic.com>
Date: Tue, 14 Oct 2025 15:35:29 -0400
Subject: [PATCH] Updates from review for multi modal data

---
 src/guidellm/backends/response_handlers.py | 67 +++++++++++-----------
 src/guidellm/benchmark/entrypoints.py      |  2 +-
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/src/guidellm/backends/response_handlers.py b/src/guidellm/backends/response_handlers.py
index 44c949e6..b7bd06ad 100644
--- a/src/guidellm/backends/response_handlers.py
+++ b/src/guidellm/backends/response_handlers.py
@@ -1,11 +1,10 @@
 """
 Response handlers for processing API responses from different generation backends.
 
-This module provides a pluggable system for handling responses from various language
-model backends, supporting both streaming and non-streaming responses. Each handler
-implements the GenerationResponseHandler protocol to parse API responses, extract
-usage metrics, and convert them into standardized GenerationResponse objects for the
-benchmark system.
+Provides a pluggable system for handling responses from language model backends,
+supporting both streaming and non-streaming responses. Each handler implements the
+GenerationResponseHandler protocol to parse API responses, extract usage metrics,
+and convert them into standardized GenerationResponse objects.
 """
 
 from __future__ import annotations
@@ -26,11 +25,11 @@
 
 class GenerationResponseHandler(Protocol):
     """
-    Protocol defining the interface for handling generation API responses.
+    Protocol for handling generation API responses.
 
-    Response handlers implement this protocol to process both streaming and
-    non-streaming responses from different backend APIs, converting them into
-    standardized GenerationResponse objects with consistent metrics extraction.
+    Defines the interface for processing both streaming and non-streaming responses
+    from backend APIs, converting them into standardized GenerationResponse objects
+    with consistent metrics extraction.
     """
 
     def compile_non_streaming(
@@ -39,7 +38,7 @@ def compile_non_streaming(
         """
         Process a complete non-streaming API response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :param response: Raw API response data from the backend
         :return: Standardized GenerationResponse with extracted metrics
         """
@@ -58,7 +57,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
         """
         Compile accumulated streaming data into a final response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :return: Standardized GenerationResponse with extracted metrics
         """
         ...
@@ -68,9 +67,9 @@ class GenerationResponseHandlerFactory(RegistryMixin[type[GenerationResponseHand
     """
     Factory for registering and creating response handlers by backend type.
 
-    Provides a registry-based system for associating handler classes with specific
-    backend API types, enabling automatic selection of the appropriate handler
-    for processing responses from different generation services.
+    Registry-based system for associating handler classes with specific backend API
+    types, enabling automatic selection of the appropriate handler for processing
+    responses from different generation services.
     """
 
 
@@ -79,9 +78,9 @@ class TextCompletionsResponseHandler(GenerationResponseHandler):
     """
     Response handler for OpenAI-style text completion endpoints.
 
-    Processes responses from text completion APIs that return generated text
-    in the 'choices' array with 'text' fields. Handles both streaming and
-    non-streaming responses, extracting usage metrics for input and output tokens.
+    Processes responses from text completion APIs that return generated text in the
+    'choices' array with 'text' fields. Handles both streaming and non-streaming
+    responses, extracting usage metrics for input and output tokens.
 
     Example:
     ::
@@ -105,7 +104,7 @@ def compile_non_streaming(
         """
         Process a complete text completion response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :param response: Complete API response containing choices and usage data
         :return: Standardized GenerationResponse with extracted text and metrics
         """
@@ -151,7 +150,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
         """
         Compile accumulated streaming text chunks into a final response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :return: Standardized GenerationResponse with concatenated text and metrics
         """
         input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
@@ -171,7 +170,7 @@ def extract_line_data(self, line: str) -> dict[str, Any] | None:
         Extract JSON data from a streaming response line.
 
         :param line: Raw line from the streaming response
-        :return: Parsed JSON data as a dictionary, or None if line is invalid
+        :return: Parsed JSON data as dictionary, or None if line indicates completion
         """
         if line == "data: [DONE]":
             return None
@@ -190,7 +189,7 @@ def extract_choices_and_usage(
         Extract choices and usage data from the API response.
 
         :param response: Complete API response containing choices and usage data
-        :return: Tuple of (choices list, usage dictionary)
+        :return: Tuple of choices list and usage dictionary
         """
         return response.get("choices", []), response.get("usage", {})
 
@@ -201,7 +200,7 @@ def extract_metrics(
         Extract input and output usage metrics from API response usage data.
 
         :param usage: Usage data dictionary from API response
-        :return: Tuple of (input_metrics, output_metrics) as UsageMetrics objects
+        :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
         """
         if not usage:
             return UsageMetrics(), UsageMetrics()
@@ -236,9 +235,9 @@ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
     """
     Response handler for OpenAI-style chat completion endpoints.
 
-    Extends TextCompletionsResponseHandler to handle chat completion responses
-    where generated text is nested within message objects in the choices array.
-    Processes both streaming and non-streaming chat completion responses.
+    Extends TextCompletionsResponseHandler to handle chat completion responses where
+    generated text is nested within message objects in the choices array. Processes
+    both streaming and non-streaming chat completion responses.
     """
 
     def compile_non_streaming(
@@ -247,10 +246,10 @@ def compile_non_streaming(
         """
         Process a complete chat completion response.
 
-        Extracts content from the message object within choices, handling the
-        nested structure specific to chat completion endpoints.
+        Extracts content from the message object within choices, handling the nested
+        structure specific to chat completion endpoints.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :param response: Complete API response containing choices and usage data
         :return: Standardized GenerationResponse with extracted content and metrics
         """
@@ -271,8 +270,8 @@ def add_streaming_line(self, line: str) -> int | None:
         """
         Process a single line from a chat completion streaming response.
 
-        Handles the chat completion specific delta structure where content
-        is nested within delta objects in the streaming response chunks.
+        Handles the chat completion specific delta structure where content is nested
+        within delta objects in the streaming response chunks.
 
         :param line: Raw SSE line from the streaming response
         :return: 1 if content was extracted, 0 if line ignored, None if done
@@ -296,7 +295,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
         """
         Compile accumulated streaming chat completion content into a final response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :return: Standardized GenerationResponse with concatenated content and metrics
         """
         input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
@@ -349,7 +348,7 @@ def compile_non_streaming(
         Extracts transcribed or translated text and audio-specific usage metrics
         including processing duration and token counts for audio content.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :param response: Complete API response containing text and usage data
         :return: Standardized GenerationResponse with extracted text and metrics
         """
@@ -412,7 +411,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
         """
         Compile accumulated streaming audio text into a final response.
 
-        :param request: The original generation request
+        :param request: Original generation request
         :return: Standardized GenerationResponse with concatenated text and metrics
         """
         input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
@@ -437,7 +436,7 @@ def extract_metrics(
         in addition to standard text token counts.
 
         :param usage: Usage data dictionary from audio API response
-        :return: Tuple of (input_metrics, output_metrics) as UsageMetrics objects
+        :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
         """
         if not usage:
             return UsageMetrics(), UsageMetrics()
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
index 18768216..61dfa680 100644
--- a/src/guidellm/benchmark/entrypoints.py
+++ b/src/guidellm/benchmark/entrypoints.py
@@ -412,7 +412,7 @@ async def reimport_benchmarks_report(
 ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
     """
     The command-line entry point for re-importing and displaying an
-    existing benchmarks report. Can also specify
+    existing benchmarks report. Can also specify an output format.
     Assumes the file provided exists.
     """
     console = Console()