Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions tests/v1/test_async_llm_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.inputs import PromptType
from vllm.platforms import current_platform
from vllm.sampling_params import RequestOutputKind
from vllm.v1.engine.async_llm import AsyncLLM
from vllm.v1.engine.core_client import DPAsyncMPClient
Expand All @@ -29,10 +28,6 @@
data_parallel_size=DP_SIZE,
)

if not current_platform.supports_v1(engine_args.create_model_config()):
pytest.skip(reason="Requires V1-supporting platform.",
allow_module_level=True)


async def generate(
engine: AsyncLLM,
Expand Down
27 changes: 0 additions & 27 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,12 +1502,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
return False

# Platforms must decide if they can support v1 for this model
if not current_platform.supports_v1(model_config=model_config):
_raise_or_fallback(
feature_name=f"device type={current_platform.device_type}",
recommend_to_remove=False)
return False
#############################################################
# Experimental Features - allow users to opt in.

Expand All @@ -1524,12 +1518,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
recommend_to_remove=False)
return False

# The platform may be supported on V1, but off by default for now.
if not current_platform.default_v1( # noqa: SIM103
model_config=model_config) and _warn_or_fallback(
current_platform.device_name):
return False

if (current_platform.is_cpu()
and model_config.get_sliding_window() is not None):
_raise_or_fallback(feature_name="sliding window (CPU backend)",
Expand Down Expand Up @@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
logger.warning(msg)


def _warn_or_fallback(feature_name: str) -> bool:
if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
logger.warning(
"Detected VLLM_USE_V1=1 with %s. Usage should "
"be considered experimental. Please report any "
"issues on Github.", feature_name)
should_exit = False
else:
logger.info(
"%s is experimental on VLLM_USE_V1=1. "
"Falling back to V0 Engine.", feature_name)
should_exit = True
return should_exit


def human_readable_int(value):
"""Parse human-readable integers like '1k', '2M', etc.
Including decimal values with decimal multipliers.
Expand Down
17 changes: 0 additions & 17 deletions vllm/platforms/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,23 +328,6 @@ def get_device_communicator_cls(cls) -> str:
def supports_structured_output(cls) -> bool:
return True

@classmethod
def supports_v1(cls, model_config) -> bool:
"""Returns whether the current platform can support v1 for the supplied
model configuration.
"""
return True

@classmethod
def default_v1(cls, model_config) -> bool:
"""Returns whether the current platform can use v1 by default for the
supplied model configuration.
"""
arch = cls.get_cpu_architecture()
return (cls.supports_v1(model_config)
and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
CpuArchEnum.ARM, CpuArchEnum.S390X))

@classmethod
def opaque_attention_op(cls) -> bool:
return True
Expand Down
4 changes: 0 additions & 4 deletions vllm/platforms/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,6 @@ def get_device_communicator_cls(cls) -> str:
def supports_fp8(cls) -> bool:
return cls.has_device_capability(89)

@classmethod
def supports_v1(cls, model_config: "ModelConfig") -> bool:
return True

@classmethod
def use_custom_allreduce(cls) -> bool:
return True
Expand Down
14 changes: 0 additions & 14 deletions vllm/platforms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,20 +482,6 @@ def use_all_gather(cls) -> bool:
or parallel_config.distributed_executor_backend
== "external_launcher")

@classmethod
def supports_v1(cls, model_config: ModelConfig) -> bool:
"""Returns whether the current platform can support v1 for the supplied
model configuration.
"""
return False

@classmethod
def default_v1(cls, model_config: ModelConfig) -> bool:
"""
Returns whether the current platform supports v1 by default.
"""
return cls.supports_v1(model_config)

@classmethod
def use_custom_allreduce(cls) -> bool:
"""
Expand Down
5 changes: 0 additions & 5 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,11 +396,6 @@ def fp8_dtype(cls) -> torch.dtype:
else:
return torch.float8_e4m3fn

@classmethod
def supports_v1(cls, model_config: "ModelConfig") -> bool:
# V1 support on AMD gpus is experimental
return True

@classmethod
def use_custom_allreduce(cls) -> bool:
# We only enable custom allreduce for MI300 series
Expand Down
5 changes: 0 additions & 5 deletions vllm/platforms/tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,6 @@ def get_device_communicator_cls(cls) -> str:
def use_all_gather(cls) -> bool:
return True

@classmethod
def supports_v1(cls, model_config: ModelConfig) -> bool:
# V1 support on TPU is experimental
return True

@classmethod
def validate_request(
cls,
Expand Down
4 changes: 0 additions & 4 deletions vllm/platforms/xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,6 @@ def is_data_center_gpu(cls) -> bool:
def get_device_communicator_cls(cls) -> str:
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa

@classmethod
def supports_v1(cls, model_config: ModelConfig) -> bool:
return True

@classmethod
def device_count(cls) -> int:
return torch.xpu.device_count()
Expand Down
Loading