From 8974ab9c515c007b18572c8aa23aafc174b558ae Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 17 Sep 2025 19:21:59 +0200 Subject: [PATCH 1/2] Get `sliding_window` from the text config in Gemma3 MM Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/gemma3_mm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index e652ba2f1c7f..bee9fbd2c084 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -688,7 +688,8 @@ def prepare_attn_masks( global_attn_mask = torch.where(img_mask == 2, 0, global_attn_mask) global_attn_masks.append(global_attn_mask) - if (sliding_window := self.config.sliding_window) is not None: + sliding_window = self.config.text_config.sliding_window + if sliding_window is not None: # Create a local causal mask with sliding window (1024). local_attn_mask = torch.ones_like(global_attn_mask) local_attn_mask = torch.tril(local_attn_mask, From 473ad540c2e64536b49c64d36378f90432d30b65 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 17 Sep 2025 19:23:09 +0200 Subject: [PATCH 2/2] Remove reference to removed attr that snuck into Gemma3n MM Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/gemma3n_mm.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index 663d4da7cec2..8d3079aee0df 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -461,9 +461,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.multimodal_config = multimodal_config self.vocab_size = config.text_config.vocab_size - self.sliding_window = getattr(config.text_config, - "interleaved_sliding_window", None) - self.vision_tower = AutoModel.from_config(config=config.vision_config) self.audio_tower = AutoModel.from_config(config=config.audio_config) self.embed_vision = Gemma3nMultimodalEmbedder(config.vision_config,