From d2a0be6b19cc7ae526788900740fe3a49b2bae9e Mon Sep 17 00:00:00 2001 From: Chendi Xue Date: Sat, 20 Sep 2025 02:14:42 +0000 Subject: [PATCH 1/3] quick fix Signed-off-by: Chendi Xue --- vllm/attention/layer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 3d1269c0ecea..3e682bbea1c1 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -29,6 +29,8 @@ logger = init_logger(__name__) USE_XFORMERS_OPS = None +tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, + ) if current_platform.is_cuda_alike() else () def check_xformers_availability(): @@ -577,7 +579,7 @@ def unified_attention_fake( mutates_args=[], fake_impl=unified_attention_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) @@ -628,5 +630,5 @@ def unified_attention_with_output_fake( mutates_args=["output", "output_block_scale"], fake_impl=unified_attention_with_output_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) From 4b71b098c4b519e17672fb6c5d5e3f3d97152b78 Mon Sep 17 00:00:00 2001 From: Chendi Xue Date: Sat, 20 Sep 2025 02:21:32 +0000 Subject: [PATCH 2/3] Take advice from gemini Signed-off-by: Chendi Xue --- vllm/attention/layer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 3e682bbea1c1..36a41ff51219 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -29,8 +29,10 @@ logger = init_logger(__name__) USE_XFORMERS_OPS = None -tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, - ) if current_platform.is_cuda_alike() else () +try: + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) +except AttributeError: + tag_cudagraph_unsafe = () def check_xformers_availability(): From f057dcaeadca5503b430ed0d5f68a741a19bb158 Mon Sep 17 00:00:00 2001 From: Chendi Xue Date: Sat, 20 Sep 2025 02:35:48 +0000 Subject: [PATCH 3/3] Fix for mypy Signed-off-by: Chendi Xue --- vllm/attention/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 36a41ff51219..544a72052442 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -32,7 +32,7 @@ try: tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) except AttributeError: - tag_cudagraph_unsafe = () + tag_cudagraph_unsafe = () # type: ignore[assignment] def check_xformers_availability():