[Bugfix] when use s3 model cannot use default load_format (#24435)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
rongfu.leng
2025-09-18 15:47:43 +08:00
committed by GitHub
parent f4cd80f944
commit 350c94deb3
2 changed files with 12 additions and 1 deletions

View File

@ -3029,6 +3029,18 @@ class VllmConfig:
SequenceClassificationConfig)
SequenceClassificationConfig.verify_and_update_config(self)
if hasattr(self.model_config, "model_weights") and is_runai_obj_uri(
self.model_config.model_weights):
if self.load_config.load_format == "auto":
logger.info("Detected Run:ai model config. "
"Overriding `load_format` to 'runai_streamer'")
self.load_config.load_format = "runai_streamer"
elif self.load_config.load_format != "runai_streamer":
raise ValueError(f"To load a model from S3, 'load_format' "
f"must be 'runai_streamer', "
f"but got '{self.load_config.load_format}'. "
f"Model: {self.model_config.model}")
def __str__(self):
return (
f"model={self.model_config.model!r}, "

View File

@ -959,7 +959,6 @@ class EngineArgs:
if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3
and self.model in MODELS_ON_S3 and self.load_format == "auto"):
self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}"
self.load_format = "runai_streamer"
if self.disable_mm_preprocessor_cache:
logger.warning(