Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions benchmarks/kernels/benchmark_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def make_rand_lora_weight_tensor(


def make_rand_tensors(
a_shape: tuple[int],
b_shape: tuple[int],
c_shape: tuple[int],
a_shape: tuple[int, ...],
b_shape: tuple[int, ...],
c_shape: tuple[int, ...],
a_dtype: torch.dtype,
b_dtype: torch.dtype,
c_dtype: torch.dtype,
Expand Down Expand Up @@ -243,7 +243,7 @@ def matmul_shapes(
lora_rank: int,
num_loras: int,
num_slices: int,
) -> tuple[tuple[int], tuple[int], tuple[int]]:
) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
"""
Given num_slices, return the shapes of the A, B, and C matrices
in A x B = C, for the op_type
Expand Down
3 changes: 3 additions & 0 deletions tests/engine/test_arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ def test_is_type(type_hint, type, expected):

@pytest.mark.parametrize(("type_hints", "type", "expected"), [
({float, int}, int, True),
({int, tuple}, int, True),
({int, tuple[int]}, int, True),
({int, tuple[int, ...]}, int, True),
({int, tuple[int]}, float, False),
({int, tuple[int, ...]}, float, False),
({str, Literal["x", "y"]}, Literal, True),
])
def test_contains_type(type_hints, type, expected):
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/core/test_pos_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _get_batch_tensor_shape(batch_size: int, seq_len: int, num_heads: int,
@torch.inference_mode()
def test_rotary_embedding(
is_neox_style: bool,
tensor_shape_fn: Callable[[int, int, int, int], tuple[int]],
tensor_shape_fn: Callable[[int, int, int, int], tuple[int, ...]],
batch_size: int,
seq_len: int,
num_heads: int,
Expand Down
4 changes: 2 additions & 2 deletions tests/kernels/test_onednn.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def onednn_gemm_test_helper(primitive_cache_size: int,
def test_onednn_int8_scaled_gemm(
n: int,
k: int,
m_list: tuple[int],
m_list: tuple[int, ...],
per_tensor_a_scale: bool,
per_tensor_b_scale: bool,
use_bias: bool,
Expand Down Expand Up @@ -196,7 +196,7 @@ def test_onednn_int8_scaled_gemm(
def test_onednn_gemm(
n: int,
k: int,
m_list: tuple[int],
m_list: tuple[int, ...],
use_bias: bool,
use_stride: bool,
dtype: torch.dtype,
Expand Down
12 changes: 6 additions & 6 deletions tests/models/multimodal/generation/vlm_utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class VLMTestInfo(NamedTuple):
# Function for converting ImageAssets to image embeddings;
# We need to define this explicitly for embedding tests
convert_assets_to_embeddings: Optional[Callable[[ImageTestAssets],
torch.Tensor]] = None
list[torch.Tensor]]] = None

# Exposed options for vLLM runner; we change these in a several tests,
# but the defaults are derived from VllmRunner & the engine defaults
Expand Down Expand Up @@ -137,12 +137,12 @@ class VLMTestInfo(NamedTuple):
# Default expandable params per test; these defaults can be overridden in
# instances of this object; the complete set of test cases for the model
# is all combinations of .models + all fields below
max_tokens: Union[int, tuple[int]] = 128
num_logprobs: Union[int, tuple[int]] = 5
dtype: Union[str, Union[list[str], tuple[str, ...]]] = "auto"
distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
max_tokens: int = 128
num_logprobs: int = 5
dtype: str = "auto"
distributed_executor_backend: Optional[str] = None
# Only expanded in video tests
num_video_frames: Union[int, tuple[int]] = 16
num_video_frames: int = 16

# Fixed image sizes / image size factors; most tests use image_size_factors
# The values provided for these two fields will be stacked and expanded
Expand Down
8 changes: 5 additions & 3 deletions tests/v1/sample/test_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ def _create_allowed_token_ids(


def _create_bad_words_token_ids(
batch_size: int, vocab_size: int,
bad_words_lengths: list[tuple[int]]) -> dict[int, list[list[int]]]:
batch_size: int,
vocab_size: int,
bad_words_lengths: tuple[int, ...],
) -> dict[int, list[list[int]]]:
bad_words_token_ids = {}
for batch_idx in range(batch_size):
token_ids_single_batch = []
Expand Down Expand Up @@ -402,7 +404,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int,
@pytest.mark.parametrize("batch_size", [1, 2, 32])
@pytest.mark.parametrize("bad_words_lengths", [(1, ), (1, 3), (2, 2)])
def test_sampler_bad_words(device: str, batch_size: int,
bad_words_lengths: list[tuple[int]]):
bad_words_lengths: tuple[int, ...]):
"""
Test to verify that when the bad words restriction is present, tokens
are penalized based on their match with the bad words.
Expand Down
2 changes: 1 addition & 1 deletion tests/v1/spec_decode/test_eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
def _create_proposer(
method: str,
num_speculative_tokens: int,
speculative_token_tree: Optional[list[tuple[int]]] = None,
speculative_token_tree: Optional[list[tuple[int, ...]]] = None,
) -> EagleProposer:
model_config = ModelConfig(model=model_dir,
runner="generate",
Expand Down
2 changes: 1 addition & 1 deletion vllm/distributed/device_communicators/ray_communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def send(self, buf: "torch.Tensor", peer_rank: int) -> None:

def recv(
self,
shape: tuple[int],
shape: tuple[int, ...],
dtype: "torch.dtype",
peer_rank: int,
allocator: TorchTensorAllocator,
Expand Down
4 changes: 2 additions & 2 deletions vllm/logits_process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from collections.abc import Sequence
from typing import Callable, Union

import torch
Expand Down Expand Up @@ -55,7 +55,7 @@ def __init__(self, bad_words_ids: list[list[int]]):

def __call__(
self,
past_tokens_ids: Union[list[int], tuple[int]],
past_tokens_ids: Sequence[int],
logits: torch.FloatTensor,
) -> torch.Tensor:
if self.word_bias is None:
Expand Down