From 0b4bc8d69e7bf454c2d1f5a814440c41b5bffd05 Mon Sep 17 00:00:00 2001
From: Sugar-zsg <952242923@qq.com>
Date: Mon, 15 Sep 2025 16:35:32 +0800
Subject: [PATCH 1/6] Directly get max encoder len from VLLM config

Improves performance by getting the max encoder length directly from the initialized `vllm_config.scheduler_config`. This avoids the expensive lookup and re-computation previously done by `MULTIMODAL_REGISTRY.get_encdec_max_encoder_len`.

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index c24fa4e15f67..db714fb9ff9e 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -22,10 +22,15 @@
 
 logger = init_logger(__name__)
 
-
-def _get_max_encoder_len(vllm_config: VllmConfig) -> int:
-    return MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(
-        vllm_config.model_config)
+def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
+    """Gets the max number of encoder tokens from the config."""
+    max_len = getattr(
+        getattr(vllm_config, "scheduler_config", None),
+        "max_num_encoder_input_tokens",
+        0,
+    )
+    return (max_len if isinstance(max_len, int) and
+            max_len > 0 else 0)
 
 
 def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,

From 5a5c11f16808cf6e82f410ca5b9a5668cd2d9d3e Mon Sep 17 00:00:00 2001
From: Sugar-zsg <952242923@qq.com>
Date: Mon, 15 Sep 2025 17:18:58 +0800
Subject: [PATCH 2/6] standardization

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index db714fb9ff9e..4623fb879b2a 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -29,8 +29,7 @@ def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
         "max_num_encoder_input_tokens",
         0,
     )
-    return (max_len if isinstance(max_len, int) and
-            max_len > 0 else 0)
+    return (max_len if isinstance(max_len, int) and max_len > 0 else 0)
 
 
 def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,

From 135fdd886c07c667443bc17f3f2068bde6651438 Mon Sep 17 00:00:00 2001
From: Sugar-zsg <952242923@qq.com>
Date: Mon, 15 Sep 2025 17:40:07 +0800
Subject: [PATCH 3/6] standardization

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index 4623fb879b2a..c4553afc18b2 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -14,7 +14,6 @@
 from vllm.attention.selector import get_attn_backend
 from vllm.config import CacheConfig, VllmConfig
 from vllm.logger import init_logger
-from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.utils import cdiv
 from vllm.v1.attention.backends.utils import (CommonAttentionMetadata,
                                               subclass_attention_backend)

From f319d511b014d5f02b978cd76d987c053e2cdcee Mon Sep 17 00:00:00 2001
From: Sugar-zsg <952242923@qq.com>
Date: Mon, 15 Sep 2025 17:49:21 +0800
Subject: [PATCH 4/6] without getattr

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index c4553afc18b2..16326f35686b 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -22,13 +22,14 @@
 logger = init_logger(__name__)
 
 def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
-    """Gets the max number of encoder tokens from the config."""
-    max_len = getattr(
-        getattr(vllm_config, "scheduler_config", None),
-        "max_num_encoder_input_tokens",
-        0,
-    )
-    return (max_len if isinstance(max_len, int) and max_len > 0 else 0)
+    """Gets the max number of encoder input tokens from the config.
+    """
+    max_len = vllm_config.scheduler_config.max_num_encoder_input_tokens
+
+    if isinstance(max_len, int) and max_len > 0:
+        return max_len
+
+    return 0
 
 
 def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,

From 85a885455f18a2741310ccb2568a4fc042332124 Mon Sep 17 00:00:00 2001
From: Sugar-zsg <952242923@qq.com>
Date: Mon, 15 Sep 2025 18:03:12 +0800
Subject: [PATCH 5/6] standardization

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index 16326f35686b..d490fcb4e05f 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -21,6 +21,7 @@
 
 logger = init_logger(__name__)
 
+
 def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
     """Gets the max number of encoder input tokens from the config.
     """

From d992f221531729bde8961e32edcdf945966edb45 Mon Sep 17 00:00:00 2001
From: Sugar-zsg <64777228+sugar-zsg@users.noreply.github.com>
Date: Mon, 15 Sep 2025 18:03:12 +0800
Subject: [PATCH 6/6] change to assert

Signed-off-by: Sugar-zsg <952242923@qq.com>
---
 vllm/attention/layers/cross_attention.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py
index d490fcb4e05f..9400c5bffa38 100644
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -25,12 +25,10 @@
 def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
     """Gets the max number of encoder input tokens from the config.
     """
-    max_len = vllm_config.scheduler_config.max_num_encoder_input_tokens
-
-    if isinstance(max_len, int) and max_len > 0:
-        return max_len
-
-    return 0
+    sc = vllm_config.scheduler_config
+    assert sc and isinstance(sc.max_num_encoder_input_tokens, int), \
+        "max_num_encoder_input_tokens must be int for enc-dec models"
+    return sc.max_num_encoder_input_tokens
 
 
 def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,