From 4dfc5d40fb1c3d87cfcb626450ff40c887b86ae5 Mon Sep 17 00:00:00 2001
From: piood <2477084691@qq.com>
Date: Mon, 27 Oct 2025 10:39:27 +0000
Subject: [PATCH 1/5] [Model] Siglip Model Support
Signed-off-by: piood <2477084691@qq.com>
---
docs/models/supported_models.md | 2 +-
tests/models/multimodal/pooling/test_siglip.py | 2 +-
vllm/model_executor/models/siglip.py | 2 +-
vllm/transformers_utils/config.py | 12 +++++++++++-
4 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 9039c0480547..f503c880fa7d 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -774,7 +774,7 @@ The following table lists those that are tested in vLLM.
| `CLIPModel` | CLIP | T / I | `openai/clip-vit-base-patch32`, `openai/clip-vit-large-patch14`, etc. | | |
| `LlavaNextForConditionalGeneration`C | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
| `Phi3VForCausalLM`C | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
-| `SiglipModel` | SigLIP | T / I | `google/siglip-base-patch16-224` | | |
+| `SiglipModel` | SigLIP | T / I | `google/siglip-base-patch16-224`,`google/siglip2-base-patch16-224` | | |
| `*ForConditionalGeneration`C, `*ForCausalLM`C, etc. | Generative models | \* | N/A | \* | \* |
C Automatically converted into an embedding model via `--convert embed`. ([details](./pooling_models.md#model-conversion))
diff --git a/tests/models/multimodal/pooling/test_siglip.py b/tests/models/multimodal/pooling/test_siglip.py
index f681b4787b69..3345b10c099a 100644
--- a/tests/models/multimodal/pooling/test_siglip.py
+++ b/tests/models/multimodal/pooling/test_siglip.py
@@ -19,7 +19,7 @@
}
)
-MODELS = ["google/siglip-base-patch16-224"]
+MODELS = ["google/siglip-base-patch16-224", "google/siglip2-base-patch16-224"]
def _run_test(
diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py
index 694e06f9fc81..4498b9391f7b 100644
--- a/vllm/model_executor/models/siglip.py
+++ b/vllm/model_executor/models/siglip.py
@@ -174,7 +174,7 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
@cached_property
def image_token_id(self) -> int:
tokenizer = self.info.get_tokenizer()
- dummy_token_id = 0
+ dummy_token_id = 6
assert dummy_token_id not in tokenizer.all_special_ids
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 7802cece6075..3d7c5835aca9 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -26,7 +26,10 @@
)
from transformers import GenerationConfig, PretrainedConfig
from transformers.models.auto.image_processing_auto import get_image_processor_config
-from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+from transformers.models.auto.modeling_auto import (
+ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
+ MODEL_MAPPING_NAMES,
+)
from transformers.models.auto.tokenization_auto import get_tokenizer_config
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
@@ -616,6 +619,13 @@ def get_config(
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
config.update({"architectures": [model_type]})
+ # Architecture mapping for models without explicit architectures field
+ if config.architectures is None:
+ if config.model_type not in MODEL_MAPPING_NAMES:
+ raise ValueError(f"Model type {config.model_type} not supported")
+ model_type = MODEL_MAPPING_NAMES[config.model_type]
+ config.update({"architectures": [model_type]})
+
# ModelOpt 0.31.0 and after saves the quantization config in the model
# config file.
quantization_config = config_dict.get("quantization_config", None)
From 5a15ab9e6be0d22e6414297982de1d22d41b75b2 Mon Sep 17 00:00:00 2001
From: piood <2477084691@qq.com>
Date: Mon, 27 Oct 2025 11:03:01 +0000
Subject: [PATCH 2/5] fix
Signed-off-by: piood <2477084691@qq.com>
---
docs/models/supported_models.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index f503c880fa7d..395a8a6c70ca 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -774,7 +774,7 @@ The following table lists those that are tested in vLLM.
| `CLIPModel` | CLIP | T / I | `openai/clip-vit-base-patch32`, `openai/clip-vit-large-patch14`, etc. | | |
| `LlavaNextForConditionalGeneration`C | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
| `Phi3VForCausalLM`C | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
-| `SiglipModel` | SigLIP | T / I | `google/siglip-base-patch16-224`,`google/siglip2-base-patch16-224` | | |
+| `SiglipModel` | SigLIP / SigLIP2 | T / I | `google/siglip-base-patch16-224`,`google/siglip2-base-patch16-224` | | |
| `*ForConditionalGeneration`C, `*ForCausalLM`C, etc. | Generative models | \* | N/A | \* | \* |
C Automatically converted into an embedding model via `--convert embed`. ([details](./pooling_models.md#model-conversion))
From 1970105c585a17b2b05d847a7ba90aebde1bc29e Mon Sep 17 00:00:00 2001
From: piood <2477084691@qq.com>
Date: Mon, 27 Oct 2025 11:08:38 +0000
Subject: [PATCH 3/5] fix
Signed-off-by: piood <2477084691@qq.com>
---
vllm/transformers_utils/config.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 3d7c5835aca9..e7b509457d1e 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -620,7 +620,7 @@ def get_config(
config.update({"architectures": [model_type]})
# Architecture mapping for models without explicit architectures field
- if config.architectures is None:
+ if not config.architectures:
if config.model_type not in MODEL_MAPPING_NAMES:
raise ValueError(f"Model type {config.model_type} not supported")
model_type = MODEL_MAPPING_NAMES[config.model_type]
From 463c27f8acc2f46975111376b7b11a5f832f7366 Mon Sep 17 00:00:00 2001
From: piood <2477084691@qq.com>
Date: Mon, 27 Oct 2025 11:16:47 +0000
Subject: [PATCH 4/5] fix
Signed-off-by: piood <2477084691@qq.com>
---
docs/models/supported_models.md | 2 +-
vllm/transformers_utils/config.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 395a8a6c70ca..47fd0c43f6d8 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -774,7 +774,7 @@ The following table lists those that are tested in vLLM.
| `CLIPModel` | CLIP | T / I | `openai/clip-vit-base-patch32`, `openai/clip-vit-large-patch14`, etc. | | |
| `LlavaNextForConditionalGeneration`C | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
| `Phi3VForCausalLM`C | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
-| `SiglipModel` | SigLIP / SigLIP2 | T / I | `google/siglip-base-patch16-224`,`google/siglip2-base-patch16-224` | | |
+| `SiglipModel` | SigLIP, SigLIP2 | T / I | `google/siglip-base-patch16-224`, `google/siglip2-base-patch16-224` | | |
| `*ForConditionalGeneration`C, `*ForCausalLM`C, etc. | Generative models | \* | N/A | \* | \* |
C Automatically converted into an embedding model via `--convert embed`. ([details](./pooling_models.md#model-conversion))
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index e7b509457d1e..13de5939356e 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -622,7 +622,7 @@ def get_config(
# Architecture mapping for models without explicit architectures field
if not config.architectures:
if config.model_type not in MODEL_MAPPING_NAMES:
- raise ValueError(f"Model type {config.model_type} not supported")
+ raise ValueError(f"Cannot find architecture name for {config.model_type}")
model_type = MODEL_MAPPING_NAMES[config.model_type]
config.update({"architectures": [model_type]})
From 39ee47a0447e7de72cf0ed71ee47f69393f7afec Mon Sep 17 00:00:00 2001
From: piood <2477084691@qq.com>
Date: Mon, 27 Oct 2025 11:29:18 +0000
Subject: [PATCH 5/5] fix
Signed-off-by: piood <2477084691@qq.com>
---
vllm/model_executor/models/siglip.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py
index 4498b9391f7b..e363be523dcc 100644
--- a/vllm/model_executor/models/siglip.py
+++ b/vllm/model_executor/models/siglip.py
@@ -174,9 +174,11 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
@cached_property
def image_token_id(self) -> int:
tokenizer = self.info.get_tokenizer()
- dummy_token_id = 6
-
- assert dummy_token_id not in tokenizer.all_special_ids
+ dummy_token_id = next(
+ token_id
+ for token_id in range(tokenizer.vocab_size)
+ if token_id not in tokenizer.all_special_ids
+ )
return dummy_token_id