diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 64be2f38c6a3..631618d427d4 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -3029,6 +3029,18 @@ def try_verify_and_update_config(self): SequenceClassificationConfig) SequenceClassificationConfig.verify_and_update_config(self) + if hasattr(self.model_config, "model_weights") and is_runai_obj_uri( + self.model_config.model_weights): + if self.load_config.load_format == "auto": + logger.info("Detected Run:ai model config. " + "Overriding `load_format` to 'runai_streamer'") + self.load_config.load_format = "runai_streamer" + elif self.load_config.load_format != "runai_streamer": + raise ValueError(f"To load a model from S3, 'load_format' " + f"must be 'runai_streamer', " + f"but got '{self.load_config.load_format}'. " + f"Model: {self.model_config.model}") + def __str__(self): return ( f"model={self.model_config.model!r}, " diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 4831cb5348c7..e2a1ec68e6f5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -959,7 +959,6 @@ def create_model_config(self) -> ModelConfig: if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3 and self.model in MODELS_ON_S3 and self.load_format == "auto"): self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}" - self.load_format = "runai_streamer" if self.disable_mm_preprocessor_cache: logger.warning(