mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Generate _ModelInfo properties file when loading to improve loading speed (#23558)
Signed-off-by: Manoel Marques <manoel.marques@ibm.com> Signed-off-by: Manoel Marques <manoelmrqs@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@ -2,7 +2,9 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm.logging_utils.formatter import NewLineFormatter
|
||||
from vllm.logging_utils.log_time import logtime
|
||||
|
||||
__all__ = [
|
||||
"NewLineFormatter",
|
||||
"logtime",
|
||||
]
|
||||
|
32
vllm/logging_utils/log_time.py
Normal file
32
vllm/logging_utils/log_time.py
Normal file
@ -0,0 +1,32 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
Provides a timeslice logging decorator
|
||||
"""
|
||||
|
||||
import functools
|
||||
import time
|
||||
|
||||
|
||||
def logtime(logger, msg=None):
|
||||
"""
|
||||
Logs the execution time of the decorated function.
|
||||
Always place it beneath other decorators.
|
||||
"""
|
||||
|
||||
def _inner(func):
|
||||
|
||||
@functools.wraps(func)
|
||||
def _wrapper(*args, **kwargs):
|
||||
start = time.perf_counter()
|
||||
result = func(*args, **kwargs)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
prefix = f"Function '{func.__module__}.{func.__qualname__}'" \
|
||||
if msg is None else msg
|
||||
logger.debug("%s: Elapsed time %.7f secs", prefix, elapsed)
|
||||
return result
|
||||
|
||||
return _wrapper
|
||||
|
||||
return _inner
|
@ -11,6 +11,7 @@ import tempfile
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
@ -98,6 +99,49 @@ def get_lock(model_name_or_path: Union[str, Path],
|
||||
return lock
|
||||
|
||||
|
||||
@contextmanager
|
||||
def atomic_writer(filepath: Union[str, Path],
|
||||
mode: str = 'w',
|
||||
encoding: Optional[str] = None):
|
||||
"""
|
||||
Context manager that provides an atomic file writing routine.
|
||||
|
||||
The context manager writes to a temporary file and, if successful,
|
||||
atomically replaces the original file.
|
||||
|
||||
Args:
|
||||
filepath (str or Path): The path to the file to write.
|
||||
mode (str): The file mode for the temporary file (e.g., 'w', 'wb').
|
||||
encoding (str): The encoding for text mode.
|
||||
|
||||
Yields:
|
||||
file object: A handle to the temporary file.
|
||||
"""
|
||||
# Create a temporary file in the same directory as the target file
|
||||
# to ensure it's on the same filesystem for an atomic replace.
|
||||
temp_dir = os.path.dirname(filepath)
|
||||
temp_fd, temp_path = tempfile.mkstemp(dir=temp_dir)
|
||||
|
||||
try:
|
||||
# Open the temporary file for writing
|
||||
with os.fdopen(temp_fd, mode=mode, encoding=encoding) as temp_file:
|
||||
yield temp_file
|
||||
|
||||
# If the 'with' block completes successfully,
|
||||
# perform the atomic replace.
|
||||
os.replace(temp_path, filepath)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Error during atomic write. Original file '%s' not modified",
|
||||
filepath)
|
||||
raise
|
||||
finally:
|
||||
# Clean up the temporary file if it still exists.
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
|
||||
|
||||
def maybe_download_from_modelscope(
|
||||
model: str,
|
||||
revision: Optional[str] = None,
|
||||
|
@ -4,7 +4,9 @@
|
||||
Whenever you add an architecture to this page, please also update
|
||||
`tests/models/registry.py` with example HuggingFace models for it.
|
||||
"""
|
||||
import hashlib
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import subprocess
|
||||
@ -12,16 +14,19 @@ import sys
|
||||
import tempfile
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Set
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional, TypeVar, Union
|
||||
|
||||
import torch.nn as nn
|
||||
import transformers
|
||||
|
||||
from vllm import envs
|
||||
from vllm.config import (ModelConfig, iter_architecture_defaults,
|
||||
try_match_architecture_defaults)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logging_utils import logtime
|
||||
from vllm.transformers_utils.dynamic_module import (
|
||||
try_get_class_from_dynamic_module)
|
||||
|
||||
@ -421,10 +426,91 @@ class _LazyRegisteredModel(_BaseRegisteredModel):
|
||||
module_name: str
|
||||
class_name: str
|
||||
|
||||
# Performed in another process to avoid initializing CUDA
|
||||
@staticmethod
|
||||
def _get_cache_dir() -> Path:
|
||||
return Path(envs.VLLM_CACHE_ROOT) / "modelinfos"
|
||||
|
||||
def _get_cache_filename(self) -> str:
|
||||
cls_name = f"{self.module_name}-{self.class_name}".replace(".", "-")
|
||||
return f"{cls_name}.json"
|
||||
|
||||
def _load_modelinfo_from_cache(self,
|
||||
module_hash: str) -> _ModelInfo | None:
|
||||
try:
|
||||
try:
|
||||
modelinfo_path = self._get_cache_dir(
|
||||
) / self._get_cache_filename()
|
||||
with open(modelinfo_path, encoding="utf-8") as file:
|
||||
mi_dict = json.load(file)
|
||||
except FileNotFoundError:
|
||||
logger.debug(("Cached model info file "
|
||||
"for class %s.%s not found"), self.module_name,
|
||||
self.class_name)
|
||||
return None
|
||||
|
||||
if mi_dict["hash"] != module_hash:
|
||||
logger.debug(("Cached model info file "
|
||||
"for class %s.%s is stale"), self.module_name,
|
||||
self.class_name)
|
||||
return None
|
||||
|
||||
# file not changed, use cached _ModelInfo properties
|
||||
return _ModelInfo(**mi_dict["modelinfo"])
|
||||
except Exception:
|
||||
logger.exception(("Cached model info "
|
||||
"for class %s.%s error. "), self.module_name,
|
||||
self.class_name)
|
||||
return None
|
||||
|
||||
def _save_modelinfo_to_cache(self, mi: _ModelInfo,
|
||||
module_hash: str) -> None:
|
||||
"""save dictionary json file to cache"""
|
||||
from vllm.model_executor.model_loader.weight_utils import atomic_writer
|
||||
try:
|
||||
modelinfo_dict = {
|
||||
"hash": module_hash,
|
||||
"modelinfo": asdict(mi),
|
||||
}
|
||||
cache_dir = self._get_cache_dir()
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
modelinfo_path = cache_dir / self._get_cache_filename()
|
||||
with atomic_writer(modelinfo_path, encoding='utf-8') as f:
|
||||
json.dump(modelinfo_dict, f, indent=2)
|
||||
except Exception:
|
||||
logger.exception("Error saving model info cache.")
|
||||
|
||||
@logtime(logger=logger, msg="Registry inspect model class")
|
||||
def inspect_model_cls(self) -> _ModelInfo:
|
||||
return _run_in_subprocess(
|
||||
model_path = Path(
|
||||
__file__).parent / f"{self.module_name.split('.')[-1]}.py"
|
||||
|
||||
assert model_path.exists(), \
|
||||
f"Model {self.module_name} expected to be on path {model_path}"
|
||||
with open(model_path, "rb") as f:
|
||||
module_hash = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
mi = self._load_modelinfo_from_cache(module_hash)
|
||||
if mi is not None:
|
||||
logger.debug(("Loaded model info "
|
||||
"for class %s.%s from cache"), self.module_name,
|
||||
self.class_name)
|
||||
return mi
|
||||
else:
|
||||
logger.debug(("Cache model info "
|
||||
"for class %s.%s miss. "
|
||||
"Loading model instead."), self.module_name,
|
||||
self.class_name)
|
||||
|
||||
# Performed in another process to avoid initializing CUDA
|
||||
mi = _run_in_subprocess(
|
||||
lambda: _ModelInfo.from_model_cls(self.load_model_cls()))
|
||||
logger.debug("Loaded model info for class %s.%s", self.module_name,
|
||||
self.class_name)
|
||||
|
||||
# save cache file
|
||||
self._save_modelinfo_to_cache(mi, module_hash)
|
||||
|
||||
return mi
|
||||
|
||||
def load_model_cls(self) -> type[nn.Module]:
|
||||
mod = importlib.import_module(self.module_name)
|
||||
|
Reference in New Issue
Block a user