diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 625f94cf7ad7..7912cf3ea52b 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -122,9 +122,10 @@ def forward( def load_fused_expert_weights(self, name: str, params_dict: dict, loaded_weight: torch.Tensor, shard_id: str, - num_experts: int): + num_experts: int) -> bool: param = params_dict[name] weight_loader = typing.cast(Callable[..., bool], param.weight_loader) + loaded_local_expert = False for expert_id in range(num_experts): curr_expert_weight = loaded_weight[expert_id] success = weight_loader(param, @@ -133,9 +134,10 @@ def load_fused_expert_weights(self, name: str, params_dict: dict, shard_id, expert_id, return_success=True) - if not success: - return False - return True + if success: + loaded_local_expert = True + + return loaded_local_expert def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: @@ -345,4 +347,4 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): for _ in range(self.deepstack_num_level) ] if self.use_deepstack else None self.visual_dim = config.vision_config.out_hidden_size - self.multiscale_dim = self.visual_dim * self.deepstack_num_level \ No newline at end of file + self.multiscale_dim = self.visual_dim * self.deepstack_num_level