vllm-project · WoosukKwon · Sep 23, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py
@@ -13,7 +13,6 @@
 from vllm.config import VllmConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.inputs import PromptType
-from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
 from vllm.v1.engine.async_llm import AsyncLLM
 from vllm.v1.engine.core_client import DPAsyncMPClient
@@ -29,10 +28,6 @@
     data_parallel_size=DP_SIZE,
 )
 
-if not current_platform.supports_v1(engine_args.create_model_config()):
-    pytest.skip(reason="Requires V1-supporting platform.",
-                allow_module_level=True)
-
 
 async def generate(
         engine: AsyncLLM,

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1502,12 +1502,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             _raise_or_fallback(feature_name=name, recommend_to_remove=True)
             return False
 
-        # Platforms must decide if they can support v1 for this model
-        if not current_platform.supports_v1(model_config=model_config):
-            _raise_or_fallback(
-                feature_name=f"device type={current_platform.device_type}",
-                recommend_to_remove=False)
-            return False
         #############################################################
         # Experimental Features - allow users to opt in.
 
@@ -1524,12 +1518,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
                                    recommend_to_remove=False)
                 return False
 
-        # The platform may be supported on V1, but off by default for now.
-        if not current_platform.default_v1(  # noqa: SIM103
-                model_config=model_config) and _warn_or_fallback(
-                    current_platform.device_name):
-            return False
-
         if (current_platform.is_cpu()
                 and model_config.get_sliding_window() is not None):
             _raise_or_fallback(feature_name="sliding window (CPU backend)",
@@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
     logger.warning(msg)
 
 
-def _warn_or_fallback(feature_name: str) -> bool:
-    if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
-        logger.warning(
-            "Detected VLLM_USE_V1=1 with %s. Usage should "
-            "be considered experimental. Please report any "
-            "issues on Github.", feature_name)
-        should_exit = False
-    else:
-        logger.info(
-            "%s is experimental on VLLM_USE_V1=1. "
-            "Falling back to V0 Engine.", feature_name)
-        should_exit = True
-    return should_exit
-
-
 def human_readable_int(value):
     """Parse human-readable integers like '1k', '2M', etc.
     Including decimal values with decimal multipliers.

@@ -328,23 +328,6 @@ def get_device_communicator_cls(cls) -> str:
     def supports_structured_output(cls) -> bool:
         return True
 
-    @classmethod
-    def supports_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return True
-
-    @classmethod
-    def default_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can use v1 by default for the
-        supplied model configuration.
-        """
-        arch = cls.get_cpu_architecture()
-        return (cls.supports_v1(model_config)
-                and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
-                             CpuArchEnum.ARM, CpuArchEnum.S390X))
-
     @classmethod
     def opaque_attention_op(cls) -> bool:
         return True

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -380,10 +380,6 @@ def get_device_communicator_cls(cls) -> str:
     def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        return True
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         return True

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -482,20 +482,6 @@ def use_all_gather(cls) -> bool:
                 or parallel_config.distributed_executor_backend
                 == "external_launcher")
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return False
-
-    @classmethod
-    def default_v1(cls, model_config: ModelConfig) -> bool:
-        """
-        Returns whether the current platform supports v1 by default.
-        """
-        return cls.supports_v1(model_config)
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         """

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -396,11 +396,6 @@ def fp8_dtype(cls) -> torch.dtype:
         else:
             return torch.float8_e4m3fn
 
-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        # V1 support on AMD gpus is experimental
-        return True
-
     @classmethod
     def use_custom_allreduce(cls) -> bool:
         # We only enable custom allreduce for MI300 series

@@ -174,11 +174,6 @@ def get_device_communicator_cls(cls) -> str:
     def use_all_gather(cls) -> bool:
         return True
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        # V1 support on TPU is experimental
-        return True
-
     @classmethod
     def validate_request(
         cls,

@@ -194,10 +194,6 @@ def is_data_center_gpu(cls) -> bool:
     def get_device_communicator_cls(cls) -> str:
         return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa
 
-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        return True
-
     @classmethod
     def device_count(cls) -> int:
         return torch.xpu.device_count()