From 34e82a6b5309689873b8f16bb50ffadae4b26c86 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 00:50:56 +0800 Subject: [PATCH 1/2] remove v1 flags in platform Signed-off-by: Isotr0py --- tests/v1/test_async_llm_dp.py | 5 ----- vllm/engine/arg_utils.py | 12 ------------ vllm/platforms/cpu.py | 17 ----------------- vllm/platforms/cuda.py | 4 ---- vllm/platforms/interface.py | 14 -------------- vllm/platforms/rocm.py | 5 ----- vllm/platforms/tpu.py | 5 ----- vllm/platforms/xpu.py | 4 ---- 8 files changed, 66 deletions(-) diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py index 32da58011be9..cef0f362cff8 100644 --- a/tests/v1/test_async_llm_dp.py +++ b/tests/v1/test_async_llm_dp.py @@ -13,7 +13,6 @@ from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs from vllm.inputs import PromptType -from vllm.platforms import current_platform from vllm.sampling_params import RequestOutputKind from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.core_client import DPAsyncMPClient @@ -29,10 +28,6 @@ data_parallel_size=DP_SIZE, ) -if not current_platform.supports_v1(engine_args.create_model_config()): - pytest.skip(reason="Requires V1-supporting platform.", - allow_module_level=True) - async def generate( engine: AsyncLLM, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d4d801b155e1..741db531cd73 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1502,12 +1502,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: _raise_or_fallback(feature_name=name, recommend_to_remove=True) return False - # Platforms must decide if they can support v1 for this model - if not current_platform.supports_v1(model_config=model_config): - _raise_or_fallback( - feature_name=f"device type={current_platform.device_type}", - recommend_to_remove=False) - return False ############################################################# # Experimental Features - allow users to opt in. @@ -1524,12 +1518,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=False) return False - # The platform may be supported on V1, but off by default for now. - if not current_platform.default_v1( # noqa: SIM103 - model_config=model_config) and _warn_or_fallback( - current_platform.device_name): - return False - if (current_platform.is_cpu() and model_config.get_sliding_window() is not None): _raise_or_fallback(feature_name="sliding window (CPU backend)", diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index cd41832bc2ea..1e15dc6a91aa 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -328,23 +328,6 @@ def get_device_communicator_cls(cls) -> str: def supports_structured_output(cls) -> bool: return True - @classmethod - def supports_v1(cls, model_config) -> bool: - """Returns whether the current platform can support v1 for the supplied - model configuration. - """ - return True - - @classmethod - def default_v1(cls, model_config) -> bool: - """Returns whether the current platform can use v1 by default for the - supplied model configuration. - """ - arch = cls.get_cpu_architecture() - return (cls.supports_v1(model_config) - and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC, - CpuArchEnum.ARM, CpuArchEnum.S390X)) - @classmethod def opaque_attention_op(cls) -> bool: return True diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 7baa5a9742f4..2e14ad8ea016 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -380,10 +380,6 @@ def get_device_communicator_cls(cls) -> str: def supports_fp8(cls) -> bool: return cls.has_device_capability(89) - @classmethod - def supports_v1(cls, model_config: "ModelConfig") -> bool: - return True - @classmethod def use_custom_allreduce(cls) -> bool: return True diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index cad04ea14c01..3f13ae72fe4d 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -482,20 +482,6 @@ def use_all_gather(cls) -> bool: or parallel_config.distributed_executor_backend == "external_launcher") - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - """Returns whether the current platform can support v1 for the supplied - model configuration. - """ - return False - - @classmethod - def default_v1(cls, model_config: ModelConfig) -> bool: - """ - Returns whether the current platform supports v1 by default. - """ - return cls.supports_v1(model_config) - @classmethod def use_custom_allreduce(cls) -> bool: """ diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 6a49bd4a3386..878718489fa8 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -396,11 +396,6 @@ def fp8_dtype(cls) -> torch.dtype: else: return torch.float8_e4m3fn - @classmethod - def supports_v1(cls, model_config: "ModelConfig") -> bool: - # V1 support on AMD gpus is experimental - return True - @classmethod def use_custom_allreduce(cls) -> bool: # We only enable custom allreduce for MI300 series diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 9852d948bc4b..e4c73b1bae6f 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -174,11 +174,6 @@ def get_device_communicator_cls(cls) -> str: def use_all_gather(cls) -> bool: return True - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - # V1 support on TPU is experimental - return True - @classmethod def validate_request( cls, diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index eb591ae4454e..034e039006c4 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -194,10 +194,6 @@ def is_data_center_gpu(cls) -> bool: def get_device_communicator_cls(cls) -> str: return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - return True - @classmethod def device_count(cls) -> int: return torch.xpu.device_count() From 71e31d48971f17d58eeb74e9c03435f064d69711 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 01:14:48 +0800 Subject: [PATCH 2/2] remove warn and fallback Signed-off-by: Isotr0py --- vllm/engine/arg_utils.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 741db531cd73..17df82c081ae 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1784,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool): logger.warning(msg) -def _warn_or_fallback(feature_name: str) -> bool: - if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1: - logger.warning( - "Detected VLLM_USE_V1=1 with %s. Usage should " - "be considered experimental. Please report any " - "issues on Github.", feature_name) - should_exit = False - else: - logger.info( - "%s is experimental on VLLM_USE_V1=1. " - "Falling back to V0 Engine.", feature_name) - should_exit = True - return should_exit - - def human_readable_int(value): """Parse human-readable integers like '1k', '2M', etc. Including decimal values with decimal multipliers.