From 8a673cf61f5877fe5d9e6794bb07943a07f25165 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 19 Sep 2025 22:54:29 -0700 Subject: [PATCH 1/4] gate cudagraph_unsafe tag for torch-2.9 Signed-off-by: Boyuan Feng --- tests/compile/silly_attention.py | 9 +++++++-- vllm/attention/layer.py | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/compile/silly_attention.py b/tests/compile/silly_attention.py index baedafbae99f..2b422ae305ab 100644 --- a/tests/compile/silly_attention.py +++ b/tests/compile/silly_attention.py @@ -8,7 +8,12 @@ import torch from torch.library import Library -from vllm.utils import direct_register_custom_op +from vllm.utils import direct_register_custom_op, is_torch_equal_or_newer + +if is_torch_equal_or_newer("2.9.0.dev"): + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) +else: + tag_cudagraph_unsafe = () # type: ignore[assignment] # Shared library for all compilation test operations # Using "silly" namespace to match existing test expectations @@ -60,5 +65,5 @@ def silly_attention_fake(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, mutates_args=["out"], fake_impl=silly_attention_fake, target_lib=silly_lib, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 544a72052442..eaaa5a4dbe04 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -25,13 +25,15 @@ from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod from vllm.model_executor.models.vision import get_vit_attn_backend from vllm.platforms import _Backend, current_platform -from vllm.utils import GiB_bytes, direct_register_custom_op +from vllm.utils import (GiB_bytes, direct_register_custom_op, + is_torch_equal_or_newer) logger = init_logger(__name__) USE_XFORMERS_OPS = None -try: + +if is_torch_equal_or_newer("2.9.0.dev"): tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) -except AttributeError: +else: tag_cudagraph_unsafe = () # type: ignore[assignment] From 68ed7cced9ccefc9ae0c29423e1b0e315a38645a Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 19 Sep 2025 22:58:46 -0700 Subject: [PATCH 2/4] nit Signed-off-by: Boyuan Feng --- tests/compile/silly_attention.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/compile/silly_attention.py b/tests/compile/silly_attention.py index 2b422ae305ab..db4dd4061a63 100644 --- a/tests/compile/silly_attention.py +++ b/tests/compile/silly_attention.py @@ -8,12 +8,8 @@ import torch from torch.library import Library -from vllm.utils import direct_register_custom_op, is_torch_equal_or_newer - -if is_torch_equal_or_newer("2.9.0.dev"): - tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) -else: - tag_cudagraph_unsafe = () # type: ignore[assignment] +from vllm.attention.layer import tag_cudagraph_unsafe +from vllm.utils import direct_register_custom_op # Shared library for all compilation test operations # Using "silly" namespace to match existing test expectations From 56d5f48d586e373162ac27dc3084fe68070060e5 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 19 Sep 2025 23:04:44 -0700 Subject: [PATCH 3/4] move to vllm.utils.__init__ Signed-off-by: Boyuan Feng --- tests/compile/silly_attention.py | 3 +-- vllm/attention/layer.py | 7 +------ vllm/utils/__init__.py | 6 ++++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/compile/silly_attention.py b/tests/compile/silly_attention.py index db4dd4061a63..e69888b737e9 100644 --- a/tests/compile/silly_attention.py +++ b/tests/compile/silly_attention.py @@ -8,8 +8,7 @@ import torch from torch.library import Library -from vllm.attention.layer import tag_cudagraph_unsafe -from vllm.utils import direct_register_custom_op +from vllm.utils import direct_register_custom_op, tag_cudagraph_unsafe # Shared library for all compilation test operations # Using "silly" namespace to match existing test expectations diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index eaaa5a4dbe04..7ff064c0b968 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -26,16 +26,11 @@ from vllm.model_executor.models.vision import get_vit_attn_backend from vllm.platforms import _Backend, current_platform from vllm.utils import (GiB_bytes, direct_register_custom_op, - is_torch_equal_or_newer) + tag_cudagraph_unsafe) logger = init_logger(__name__) USE_XFORMERS_OPS = None -if is_torch_equal_or_newer("2.9.0.dev"): - tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) -else: - tag_cudagraph_unsafe = () # type: ignore[assignment] - def check_xformers_availability(): global USE_XFORMERS_OPS diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index fd1c0af31269..187667a59329 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -3472,3 +3472,9 @@ def length_from_prompt_token_ids_or_embeds( f" prompt_token_ids={prompt_token_len}" f" prompt_embeds={prompt_embeds_len}") return prompt_token_len + + +if is_torch_equal_or_newer("2.9.0.dev"): + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) +else: + tag_cudagraph_unsafe = () # type: ignore[assignment] From 99aee9710507e6b243ee5715306b1291b328be75 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 19 Sep 2025 23:31:16 -0700 Subject: [PATCH 4/4] gate by is_cuda_alike Signed-off-by: Boyuan Feng --- vllm/utils/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 187667a59329..f225b396b45a 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -3475,6 +3475,8 @@ def length_from_prompt_token_ids_or_embeds( if is_torch_equal_or_newer("2.9.0.dev"): - tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) + from vllm.platforms import current_platform + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, + ) if current_platform.is_cuda_alike() else () else: tag_cudagraph_unsafe = () # type: ignore[assignment]