diff --git a/vllm/config.py b/vllm/config.py index d643daa5c5..d8318ab229 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -44,7 +44,8 @@ from vllm.transformers_utils.config import ( ConfigFormat, get_config, get_hf_image_processor_config, get_hf_text_config, get_pooling_config, get_sentence_transformer_tokenizer_config, is_encoder_decoder, - try_get_generation_config, try_get_safetensors_metadata, uses_mrope) + try_get_generation_config, try_get_safetensors_metadata, + try_get_tokenizer_config, uses_mrope) from vllm.transformers_utils.s3_utils import S3Model from vllm.transformers_utils.utils import is_s3, maybe_model_redirect from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS, @@ -1427,6 +1428,18 @@ class ModelConfig: sliding_window_len=self.get_hf_config_sliding_window(), spec_target_max_model_len=self.spec_target_max_model_len, encoder_config=self.encoder_config) + + tokenizer_config = try_get_tokenizer_config( + self.tokenizer, + trust_remote_code=self.trust_remote_code, + revision=self.tokenizer_revision) + + if tokenizer_config is None: + return max_model_len + + model_max_length = tokenizer_config.get("model_max_length", + max_model_len) + max_model_len = min(max_model_len, model_max_length) return max_model_len diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 9bc3b8e09a..d66e296fdb 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -23,6 +23,7 @@ from transformers.models.auto.image_processing_auto import ( get_image_processor_config) from transformers.models.auto.modeling_auto import ( MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) +from transformers.models.auto.tokenization_auto import get_tokenizer_config from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME from vllm import envs @@ -867,3 +868,18 @@ def try_get_safetensors_metadata( "Error retrieving safetensors") except Exception: return None + + +def try_get_tokenizer_config( + pretrained_model_name_or_path: Union[str, os.PathLike], + trust_remote_code: bool, + revision: Optional[str] = None, +) -> Optional[dict[str, Any]]: + try: + return get_tokenizer_config( + pretrained_model_name_or_path, + trust_remote_code=trust_remote_code, + revision=revision, + ) + except Exception: + return None