vllm-project · hmellor · Sep 25, 2025 · Sep 18, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1465,12 +1465,18 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             return False
 
         # V1 supports N-gram, Medusa, and Eagle speculative decoding.
-        if (self.speculative_config is not None
-                and self.speculative_config.get("method") == "draft_model"):
-            raise NotImplementedError(
-                "Speculative decoding with draft model is not supported yet. "
-                "Please consider using other speculative decoding methods "
-                "such as ngram, medusa, eagle, or deepseek_mtp.")
+        if self.speculative_config is not None:
+            # speculative_config could still be a dict at this point
+            if isinstance(self.speculative_config, dict):
+                method = self.speculative_config.get("method", None)
+            else:
+                method = self.speculative_config.method
+
+            if method == "draft_model":
+                raise NotImplementedError(
+                    "Draft model speculative decoding is not supported yet. "
+                    "Please consider using other speculative decoding methods "
+                    "such as ngram, medusa, eagle, or deepseek_mtp.")
 
         V1_BACKENDS = [
             "FLASH_ATTN_VLLM_V1",