diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d3e23890469e..3d48d2a0b22d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1465,12 +1465,18 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: return False # V1 supports N-gram, Medusa, and Eagle speculative decoding. - if (self.speculative_config is not None - and self.speculative_config.get("method") == "draft_model"): - raise NotImplementedError( - "Speculative decoding with draft model is not supported yet. " - "Please consider using other speculative decoding methods " - "such as ngram, medusa, eagle, or deepseek_mtp.") + if self.speculative_config is not None: + # speculative_config could still be a dict at this point + if isinstance(self.speculative_config, dict): + method = self.speculative_config.get("method", None) + else: + method = self.speculative_config.method + + if method == "draft_model": + raise NotImplementedError( + "Draft model speculative decoding is not supported yet. " + "Please consider using other speculative decoding methods " + "such as ngram, medusa, eagle, or deepseek_mtp.") V1_BACKENDS = [ "FLASH_ATTN_VLLM_V1",