mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Bugfix] model_max_length should consider max_model_len in tokenizer_config (#19201)
This commit is contained in:
@ -44,7 +44,8 @@ from vllm.transformers_utils.config import (
|
||||
ConfigFormat, get_config, get_hf_image_processor_config,
|
||||
get_hf_text_config, get_pooling_config,
|
||||
get_sentence_transformer_tokenizer_config, is_encoder_decoder,
|
||||
try_get_generation_config, try_get_safetensors_metadata, uses_mrope)
|
||||
try_get_generation_config, try_get_safetensors_metadata,
|
||||
try_get_tokenizer_config, uses_mrope)
|
||||
from vllm.transformers_utils.s3_utils import S3Model
|
||||
from vllm.transformers_utils.utils import is_s3, maybe_model_redirect
|
||||
from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS,
|
||||
@ -1427,6 +1428,18 @@ class ModelConfig:
|
||||
sliding_window_len=self.get_hf_config_sliding_window(),
|
||||
spec_target_max_model_len=self.spec_target_max_model_len,
|
||||
encoder_config=self.encoder_config)
|
||||
|
||||
tokenizer_config = try_get_tokenizer_config(
|
||||
self.tokenizer,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
revision=self.tokenizer_revision)
|
||||
|
||||
if tokenizer_config is None:
|
||||
return max_model_len
|
||||
|
||||
model_max_length = tokenizer_config.get("model_max_length",
|
||||
max_model_len)
|
||||
max_model_len = min(max_model_len, model_max_length)
|
||||
return max_model_len
|
||||
|
||||
|
||||
|
@ -23,6 +23,7 @@ from transformers.models.auto.image_processing_auto import (
|
||||
get_image_processor_config)
|
||||
from transformers.models.auto.modeling_auto import (
|
||||
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
|
||||
from transformers.models.auto.tokenization_auto import get_tokenizer_config
|
||||
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
||||
|
||||
from vllm import envs
|
||||
@ -867,3 +868,18 @@ def try_get_safetensors_metadata(
|
||||
"Error retrieving safetensors")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def try_get_tokenizer_config(
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
trust_remote_code: bool,
|
||||
revision: Optional[str] = None,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
try:
|
||||
return get_tokenizer_config(
|
||||
pretrained_model_name_or_path,
|
||||
trust_remote_code=trust_remote_code,
|
||||
revision=revision,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
Reference in New Issue
Block a user