From a3a54cdce01cf0ab6ea4cc9fab4173d8e017b181 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Wed, 17 Sep 2025 17:51:05 -0700
Subject: [PATCH] [V0 Deprecation] Remove misc V0 tests

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 tests/model_executor/test_logits_processor.py | 98 -------------------
 tests/test_cache_block_hashing.py             | 92 -----------------
 2 files changed, 190 deletions(-)
 delete mode 100644 tests/model_executor/test_logits_processor.py
 delete mode 100644 tests/test_cache_block_hashing.py

diff --git a/tests/model_executor/test_logits_processor.py b/tests/model_executor/test_logits_processor.py
deleted file mode 100644
index 532ebba038d3..000000000000
--- a/tests/model_executor/test_logits_processor.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import random
-from unittest.mock import patch
-
-import pytest
-import torch
-
-from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.utils import set_random_seed
-from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
-from vllm.utils import is_pin_memory_available
-
-
-class MockLogitsProcessor(LogitsProcessor):
-
-    def __init__(self, vocab_size: int, scale: float,
-                 fake_logits: torch.Tensor):
-        super().__init__(vocab_size=vocab_size, scale=scale)
-        self.fake_logits = fake_logits.clone()
-
-    def forward(self, *args, **kwargs):
-        with patch(
-                "vllm.model_executor.layers.logits_processor._prune_hidden_states",
-                lambda x, y: x
-        ), patch(
-                "vllm.model_executor.layers.logits_processor.LogitsProcessor._get_logits",
-                lambda *args, **kwargs: self.fake_logits):
-            return super().forward(*args, **kwargs)
-
-
-def _prepare_test(
-        batch_size: int
-) -> tuple[torch.Tensor, torch.Tensor, MockLogitsProcessor]:
-    vocab_size = 32000
-    input_tensor = torch.rand((batch_size, 1024), dtype=torch.float16)
-    fake_logits = torch.full((batch_size, vocab_size),
-                             1e-2,
-                             dtype=input_tensor.dtype)
-    logits_processor = MockLogitsProcessor(32000, 0.5, fake_logits)
-    return input_tensor, fake_logits, logits_processor
-
-
-RANDOM_SEEDS = list(range(128))
-CUDA_DEVICES = [
-    f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
-]
-
-
-@pytest.mark.parametrize("seed", RANDOM_SEEDS)
-@pytest.mark.parametrize("device", CUDA_DEVICES)
-def test_logits_processors(seed: int, device: str):
-    set_random_seed(seed)
-    torch.set_default_device(device)
-    batch_size = random.randint(1, 256)
-    input_tensor, fake_logits, logits_processor = _prepare_test(batch_size)
-
-    # This sample logits processor gives infinite score to the i-th token,
-    # where i is the length of the input sequence.
-    # We therefore expect the output token sequence to be [0, 1, 2, ...]
-    def pick_ith(token_ids, logits):
-        logits[len(token_ids)] = float("inf")
-        return logits
-
-    seq_group_metadata_list = []
-    seq_lens = []
-    for i in range(batch_size):
-        seq_group_metadata_list.append(
-            SequenceGroupMetadata(
-                request_id=f"test_{i}",
-                is_prompt=True,
-                seq_data={0: SequenceData.from_seqs([1, 2, 3])},
-                sampling_params=SamplingParams(temperature=0,
-                                               logits_processors=[pick_ith]),
-                block_tables={0: [1]},
-            ))
-        seq_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
-
-    sampling_metadata = SamplingMetadata.prepare(
-        seq_group_metadata_list,
-        seq_lens,
-        query_lens=seq_lens,
-        device=device,
-        pin_memory=is_pin_memory_available())
-    logits_processor_output = logits_processor(
-        lm_head=None,
-        hidden_states=input_tensor,
-        sampling_metadata=sampling_metadata)
-
-    assert torch.isinf(logits_processor_output[:, 0]).all()
-
-    fake_logits *= logits_processor.scale
-    torch.testing.assert_close(logits_processor_output[:, 1],
-                               fake_logits[:, 1],
-                               rtol=1e-4,
-                               atol=0.0)
diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
deleted file mode 100644
index 1dba0fd0fb3d..000000000000
--- a/tests/test_cache_block_hashing.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Test hashing of cache blocks.
-
-Run `pytest tests/test_cache_block_hashing.py`.
-"""
-from typing import Optional
-
-import pytest
-
-from vllm.inputs import token_inputs
-from vllm.lora.request import LoRARequest
-from vllm.sequence import Sequence
-from vllm.transformers_utils.tokenizer import get_tokenizer
-
-# Make two prefixes with different first blocks.
-prefix_start = [("You are an expert"), ("You are a")]
-prefix_common = (
-    " school principal, skilled in effectively managing "
-    "faculty and staff. Draft 10-15 questions for a potential first grade "
-    "Head Teacher for my K-12, all-girls', independent school that emphasizes "
-    "community, joyful discovery, and life-long learning. The candidate is "
-    "coming in for a first-round panel interview for a 8th grade Math "
-    "teaching role. They have 5 years of previous teaching experience "
-    "as an assistant teacher at a co-ed, public school with experience "
-    "in middle school math teaching. Based on this, fulfill "
-    "the following: ")
-prefixes = [start + prefix_common for start in prefix_start]
-
-# Sample prompts.
-sample_prompts = [
-    "Hello, my name is", "The president of the United States is",
-    "The capital of France is", "The future of AI is"
-]
-
-
-# Helper function.
-def flatten_2d(li):
-    return [lss for ls in li for lss in ls]
-
-
-@pytest.mark.parametrize("model", ["facebook/opt-125m"])
-@pytest.mark.parametrize("block_size", [16])
-@pytest.mark.parametrize("max_num_seqs", [256])
-@pytest.mark.parametrize("concurrent_lora_int_ids",
-                         [[None], [1], [None, 1], [None, 1, 2], [1, 2]])
-def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
-                             concurrent_lora_int_ids: list[Optional[int]]):
-
-    tokenizer = get_tokenizer("facebook/opt-125m")
-
-    hashes: list[list[list[int]]] = []
-
-    for prefix in prefixes:
-        for lora_int_id in concurrent_lora_int_ids:
-            lora_request = None
-
-            if lora_int_id is not None:
-                lora_request = LoRARequest(
-                    f"example_lora_{lora_int_id}",
-                    lora_int_id,
-                    f"example/path/to/lora_{lora_int_id}",
-                )
-
-            hashes.append([])
-            prompts = [prefix + prompt for prompt in sample_prompts]
-            for seq_id, prompt in enumerate(prompts):
-                hashes[-1].append([])
-                prompt_token_ids = tokenizer.encode(prompt)
-                seq = Sequence(seq_id,
-                               inputs=token_inputs(prompt_token_ids,
-                                                   prompt=prompt),
-                               block_size=block_size,
-                               eos_token_id=tokenizer.eos_token_id,
-                               lora_request=lora_request)
-
-                num_blocks = len(prompt_token_ids) // block_size
-                for idx in range(num_blocks):
-                    hashes[-1][-1].append(seq.hash_of_block(idx))
-
-    # Check that hashes made with two prefixes with different first blocks are
-    # different everywhere.
-    for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):
-        assert (hash0 != hash1)
-
-    # Check that hashes of different prompts made with the same prefix are the
-    # same until the hashes that contain the prompt.
-    for hash_pref in hashes:
-        same_hashes = [tuple(h[:-1]) for h in hash_pref]
-        different_hashes = [h[-1] for h in hash_pref]
-        assert (len(set(same_hashes)) == 1)
-        assert (len(set(different_hashes)) == len(different_hashes))