From 4c1236e0874c637d598e7ef03d3c55e2770d870e Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Thu, 25 Sep 2025 09:36:58 +0000
Subject: [PATCH 1/2] [mypy] Fix wrong type annotations related to tuple

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 benchmarks/kernels/benchmark_lora.py                 |  8 ++++----
 tests/engine/test_arg_utils.py                       |  3 +++
 tests/kernels/core/test_pos_encoding.py              |  2 +-
 tests/kernels/test_onednn.py                         |  4 ++--
 .../models/multimodal/generation/vlm_utils/types.py  | 12 ++++++------
 tests/v1/sample/test_sampler.py                      |  8 +++++---
 tests/v1/spec_decode/test_eagle.py                   |  2 +-
 .../device_communicators/ray_communicator.py         |  2 +-
 vllm/logits_process.py                               |  4 ++--
 9 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/benchmarks/kernels/benchmark_lora.py b/benchmarks/kernels/benchmark_lora.py
index debb29744bfa..799b16999873 100644
--- a/benchmarks/kernels/benchmark_lora.py
+++ b/benchmarks/kernels/benchmark_lora.py
@@ -79,9 +79,9 @@ def make_rand_lora_weight_tensor(
 
 
 def make_rand_tensors(
-    a_shape: tuple[int],
-    b_shape: tuple[int],
-    c_shape: tuple[int],
+    a_shape: tuple[int, ...],
+    b_shape: tuple[int, ...],
+    c_shape: tuple[int, ...],
     a_dtype: torch.dtype,
     b_dtype: torch.dtype,
     c_dtype: torch.dtype,
@@ -243,7 +243,7 @@ def matmul_shapes(
         lora_rank: int,
         num_loras: int,
         num_slices: int,
-    ) -> tuple[tuple[int], tuple[int], tuple[int]]:
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
         """
         Given num_slices, return the shapes of the A, B, and C matrices
         in A x B = C, for the op_type
diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py
index b82e83963804..33888f008f04 100644
--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@@ -50,8 +50,11 @@ def test_is_type(type_hint, type, expected):
 
 @pytest.mark.parametrize(("type_hints", "type", "expected"), [
     ({float, int}, int, True),
+    ({int, tuple}, int, True),
     ({int, tuple[int]}, int, True),
+    ({int, tuple[int, ...]}, int, True),
     ({int, tuple[int]}, float, False),
+    ({int, tuple[int, ...]}, float, False),
     ({str, Literal["x", "y"]}, Literal, True),
 ])
 def test_contains_type(type_hints, type, expected):
diff --git a/tests/kernels/core/test_pos_encoding.py b/tests/kernels/core/test_pos_encoding.py
index bf9b1d9b4401..1235e3222a78 100644
--- a/tests/kernels/core/test_pos_encoding.py
+++ b/tests/kernels/core/test_pos_encoding.py
@@ -60,7 +60,7 @@ def _get_batch_tensor_shape(batch_size: int, seq_len: int, num_heads: int,
 @torch.inference_mode()
 def test_rotary_embedding(
     is_neox_style: bool,
-    tensor_shape_fn: Callable[[int, int, int, int], tuple[int]],
+    tensor_shape_fn: Callable[[int, int, int, int], tuple[int, ...]],
     batch_size: int,
     seq_len: int,
     num_heads: int,
diff --git a/tests/kernels/test_onednn.py b/tests/kernels/test_onednn.py
index 37772464a209..198a8fdf0c33 100644
--- a/tests/kernels/test_onednn.py
+++ b/tests/kernels/test_onednn.py
@@ -165,7 +165,7 @@ def onednn_gemm_test_helper(primitive_cache_size: int,
 def test_onednn_int8_scaled_gemm(
     n: int,
     k: int,
-    m_list: tuple[int],
+    m_list: tuple[int, ...],
     per_tensor_a_scale: bool,
     per_tensor_b_scale: bool,
     use_bias: bool,
@@ -196,7 +196,7 @@ def test_onednn_int8_scaled_gemm(
 def test_onednn_gemm(
     n: int,
     k: int,
-    m_list: tuple[int],
+    m_list: tuple[int, ...],
     use_bias: bool,
     use_stride: bool,
     dtype: torch.dtype,
diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py
index e39ca40fbbf5..6a82bdfc4cf2 100644
--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@@ -101,7 +101,7 @@ class VLMTestInfo(NamedTuple):
     # Function for converting ImageAssets to image embeddings;
     # We need to define this explicitly for embedding tests
     convert_assets_to_embeddings: Optional[Callable[[ImageTestAssets],
-                                                    torch.Tensor]] = None
+                                                    list[torch.Tensor]]] = None
 
     # Exposed options for vLLM runner; we change these in a several tests,
     # but the defaults are derived from VllmRunner & the engine defaults
@@ -137,12 +137,12 @@ class VLMTestInfo(NamedTuple):
     # Default expandable params per test; these defaults can be overridden in
     # instances of this object; the complete set of test cases for the model
     # is all combinations of .models + all fields below
-    max_tokens: Union[int, tuple[int]] = 128
-    num_logprobs: Union[int, tuple[int]] = 5
-    dtype: Union[str, Union[list[str], tuple[str, ...]]] = "auto"
-    distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
+    max_tokens: int = 128
+    num_logprobs: int = 5
+    dtype: str = "auto"
+    distributed_executor_backend: Optional[str] = None
     # Only expanded in video tests
-    num_video_frames: Union[int, tuple[int]] = 16
+    num_video_frames: int = 16
 
     # Fixed image sizes / image size factors; most tests use image_size_factors
     # The values provided for these two fields will be stacked and expanded
diff --git a/tests/v1/sample/test_sampler.py b/tests/v1/sample/test_sampler.py
index 53215f88bb27..025ba5c8dcac 100644
--- a/tests/v1/sample/test_sampler.py
+++ b/tests/v1/sample/test_sampler.py
@@ -72,8 +72,10 @@ def _create_allowed_token_ids(
 
 
 def _create_bad_words_token_ids(
-        batch_size: int, vocab_size: int,
-        bad_words_lengths: list[tuple[int]]) -> dict[int, list[list[int]]]:
+    batch_size: int,
+    vocab_size: int,
+    bad_words_lengths: list[tuple[int, ...]],
+) -> dict[int, list[list[int]]]:
     bad_words_token_ids = {}
     for batch_idx in range(batch_size):
         token_ids_single_batch = []
@@ -402,7 +404,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int,
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("bad_words_lengths", [(1, ), (1, 3), (2, 2)])
 def test_sampler_bad_words(device: str, batch_size: int,
-                           bad_words_lengths: list[tuple[int]]):
+                           bad_words_lengths: list[tuple[int, ...]]):
     """
     Test to verify that when the bad words restriction is present, tokens
     are penalized based on their match with the bad words.
diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py
index 5096f9fd647b..0b28365ed599 100644
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -30,7 +30,7 @@
 def _create_proposer(
     method: str,
     num_speculative_tokens: int,
-    speculative_token_tree: Optional[list[tuple[int]]] = None,
+    speculative_token_tree: Optional[list[tuple[int, ...]]] = None,
 ) -> EagleProposer:
     model_config = ModelConfig(model=model_dir,
                                runner="generate",
diff --git a/vllm/distributed/device_communicators/ray_communicator.py b/vllm/distributed/device_communicators/ray_communicator.py
index 8cd8c459a9e5..69efc8b45270 100644
--- a/vllm/distributed/device_communicators/ray_communicator.py
+++ b/vllm/distributed/device_communicators/ray_communicator.py
@@ -178,7 +178,7 @@ def send(self, buf: "torch.Tensor", peer_rank: int) -> None:
 
     def recv(
         self,
-        shape: tuple[int],
+        shape: tuple[int, ...],
         dtype: "torch.dtype",
         peer_rank: int,
         allocator: TorchTensorAllocator,
diff --git a/vllm/logits_process.py b/vllm/logits_process.py
index 5967d0836bd4..48f7e7495b17 100644
--- a/vllm/logits_process.py
+++ b/vllm/logits_process.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
+from collections.abc import Sequence
 from typing import Callable, Union
 
 import torch
@@ -55,7 +55,7 @@ def __init__(self, bad_words_ids: list[list[int]]):
 
     def __call__(
         self,
-        past_tokens_ids: Union[list[int], tuple[int]],
+        past_tokens_ids: Sequence[int],
         logits: torch.FloatTensor,
     ) -> torch.Tensor:
         if self.word_bias is None:

From a335ff0ec3b7a589897e6a4ea2ff6e4859b74d3b Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Thu, 25 Sep 2025 09:42:07 +0000
Subject: [PATCH 2/2] Address comments

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/v1/sample/test_sampler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/v1/sample/test_sampler.py b/tests/v1/sample/test_sampler.py
index 025ba5c8dcac..6ff000043265 100644
--- a/tests/v1/sample/test_sampler.py
+++ b/tests/v1/sample/test_sampler.py
@@ -74,7 +74,7 @@ def _create_allowed_token_ids(
 def _create_bad_words_token_ids(
     batch_size: int,
     vocab_size: int,
-    bad_words_lengths: list[tuple[int, ...]],
+    bad_words_lengths: tuple[int, ...],
 ) -> dict[int, list[list[int]]]:
     bad_words_token_ids = {}
     for batch_idx in range(batch_size):
@@ -404,7 +404,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int,
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("bad_words_lengths", [(1, ), (1, 3), (2, 2)])
 def test_sampler_bad_words(device: str, batch_size: int,
-                           bad_words_lengths: list[tuple[int, ...]]):
+                           bad_words_lengths: tuple[int, ...]):
     """
     Test to verify that when the bad words restriction is present, tokens
     are penalized based on their match with the bad words.