From 34e82a6b5309689873b8f16bb50ffadae4b26c86 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Tue, 23 Sep 2025 00:50:56 +0800
Subject: [PATCH 1/2] remove v1 flags in platform

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 tests/v1/test_async_llm_dp.py |  5 -----
 vllm/engine/arg_utils.py      | 12 ------------
 vllm/platforms/cpu.py         | 17 -----------------
 vllm/platforms/cuda.py        |  4 ----
 vllm/platforms/interface.py   | 14 --------------
 vllm/platforms/rocm.py        |  5 -----
 vllm/platforms/tpu.py         |  5 -----
 vllm/platforms/xpu.py         |  4 ----
 8 files changed, 66 deletions(-)

diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py
index 32da58011be9..cef0f362cff8 100644
--- a/tests/v1/test_async_llm_dp.py
+++ b/tests/v1/test_async_llm_dp.py
@@ -13,7 +13,6 @@
 from vllm.config import VllmConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.inputs import PromptType
-from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
 from vllm.v1.engine.async_llm import AsyncLLM
 from vllm.v1.engine.core_client import DPAsyncMPClient
@@ -29,10 +28,6 @@
     data_parallel_size=DP_SIZE,
 )
 
-if not current_platform.supports_v1(engine_args.create_model_config()):
-    pytest.skip(reason="Requires V1-supporting platform.",
-                allow_module_level=True)
-
 
 async def generate(
         engine: AsyncLLM,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index d4d801b155e1..741db531cd73 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1502,12 +1502,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             _raise_or_fallback(feature_name=name, recommend_to_remove=True)
             return False
 
-        # Platforms must decide if they can support v1 for this model
-        if not current_platform.supports_v1(model_config=model_config):
-            _raise_or_fallback(
-                feature_name=f"device type={current_platform.device_type}",
-                recommend_to_remove=False)
-            return False
         #############################################################
         # Experimental Features - allow users to opt in.
 
@@ -1524,12 +1518,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
                                    recommend_to_remove=False)
                 return False
 
-        # The platform may be supported on V1, but off by default for now.
-        if not current_platform.default_v1(  # noqa: SIM103
-                model_config=model_config) and _warn_or_fallback(
-                    current_platform.device_name):
-            return False
-
         if (current_platform.is_cpu()
                 and model_config.get_sliding_window() is not None):
             _raise_or_fallback(feature_name="sliding window (CPU backend)",
diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py
index cd41832bc2ea..1e15dc6a91aa 100644
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -328,23 +328,6 @@ def get_device_communicator_cls(cls) -> str:
     def supports_structured_output(cls) -> bool:
         return True
 
-    @classmethod
-    def supports_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return True
-
-    @classmethod
-    def default_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can use v1 by default for the
-        supplied model configuration.
-        """
-        arch = cls.get_cpu_architecture()
-        return (cls.supports_v1(model_config)
-                and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
-                             CpuArchEnum.ARM, CpuArchEnum.S390X))
-
     @classmethod
     def opaque_attention_op(cls) -> bool:
         return True
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 7baa5a9742f4..2e14ad8ea016 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -380,10 +380,6 @@ def get_device_communicator_cls(cls) -> str:
     def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        return True
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         return True
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index cad04ea14c01..3f13ae72fe4d 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -482,20 +482,6 @@ def use_all_gather(cls) -> bool:
                 or parallel_config.distributed_executor_backend
                 == "external_launcher")
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return False
-
-    @classmethod
-    def default_v1(cls, model_config: ModelConfig) -> bool:
-        """
-        Returns whether the current platform supports v1 by default.
-        """
-        return cls.supports_v1(model_config)
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         """
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 6a49bd4a3386..878718489fa8 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -396,11 +396,6 @@ def fp8_dtype(cls) -> torch.dtype:
         else:
             return torch.float8_e4m3fn
 
-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        # V1 support on AMD gpus is experimental
-        return True
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         # We only enable custom allreduce for MI300 series
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index 9852d948bc4b..e4c73b1bae6f 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -174,11 +174,6 @@ def get_device_communicator_cls(cls) -> str:
     def use_all_gather(cls) -> bool:
         return True
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        # V1 support on TPU is experimental
-        return True
-
     @classmethod
     def validate_request(
         cls,
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index eb591ae4454e..034e039006c4 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -194,10 +194,6 @@ def is_data_center_gpu(cls) -> bool:
     def get_device_communicator_cls(cls) -> str:
         return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        return True
-
     @classmethod
     def device_count(cls) -> int:
         return torch.xpu.device_count()

From 71e31d48971f17d58eeb74e9c03435f064d69711 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Tue, 23 Sep 2025 01:14:48 +0800
Subject: [PATCH 2/2] remove warn and fallback

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 vllm/engine/arg_utils.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 741db531cd73..17df82c081ae 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1784,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
     logger.warning(msg)
 
 
-def _warn_or_fallback(feature_name: str) -> bool:
-    if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
-        logger.warning(
-            "Detected VLLM_USE_V1=1 with %s. Usage should "
-            "be considered experimental. Please report any "
-            "issues on Github.", feature_name)
-        should_exit = False
-    else:
-        logger.info(
-            "%s is experimental on VLLM_USE_V1=1. "
-            "Falling back to V0 Engine.", feature_name)
-        should_exit = True
-    return should_exit
-
-
 def human_readable_int(value):
     """Parse human-readable integers like '1k', '2M', etc.
     Including decimal values with decimal multipliers.