97 lines
3.6 KiB
Python
97 lines
3.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
|
|
|
|
# The version.py should be independent library, and we always import the
|
|
# version library first. Such assumption is critical for some customization.
|
|
from .version import __version__, __version_tuple__ # isort:skip
|
|
|
|
import typing
|
|
|
|
# The environment variables override should be imported before any other
|
|
# modules to ensure that the environment variables are set before any
|
|
# other modules are imported.
|
|
import vllm.env_override # noqa: F401
|
|
|
|
MODULE_ATTRS = {
|
|
"AsyncEngineArgs": ".engine.arg_utils:AsyncEngineArgs",
|
|
"EngineArgs": ".engine.arg_utils:EngineArgs",
|
|
"AsyncLLMEngine": ".engine.async_llm_engine:AsyncLLMEngine",
|
|
"LLMEngine": ".engine.llm_engine:LLMEngine",
|
|
"LLM": ".entrypoints.llm:LLM",
|
|
"initialize_ray_cluster": ".executor.ray_utils:initialize_ray_cluster",
|
|
"PromptType": ".inputs:PromptType",
|
|
"TextPrompt": ".inputs:TextPrompt",
|
|
"TokensPrompt": ".inputs:TokensPrompt",
|
|
"ModelRegistry": ".model_executor.models:ModelRegistry",
|
|
"SamplingParams": ".sampling_params:SamplingParams",
|
|
"PoolingParams": ".pooling_params:PoolingParams",
|
|
"ClassificationOutput": ".outputs:ClassificationOutput",
|
|
"ClassificationRequestOutput": ".outputs:ClassificationRequestOutput",
|
|
"CompletionOutput": ".outputs:CompletionOutput",
|
|
"EmbeddingOutput": ".outputs:EmbeddingOutput",
|
|
"EmbeddingRequestOutput": ".outputs:EmbeddingRequestOutput",
|
|
"PoolingOutput": ".outputs:PoolingOutput",
|
|
"PoolingRequestOutput": ".outputs:PoolingRequestOutput",
|
|
"RequestOutput": ".outputs:RequestOutput",
|
|
"ScoringOutput": ".outputs:ScoringOutput",
|
|
"ScoringRequestOutput": ".outputs:ScoringRequestOutput",
|
|
}
|
|
|
|
if typing.TYPE_CHECKING:
|
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|
from vllm.engine.llm_engine import LLMEngine
|
|
from vllm.entrypoints.llm import LLM
|
|
from vllm.executor.ray_utils import initialize_ray_cluster
|
|
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
|
|
from vllm.model_executor.models import ModelRegistry
|
|
from vllm.outputs import (ClassificationOutput,
|
|
ClassificationRequestOutput, CompletionOutput,
|
|
EmbeddingOutput, EmbeddingRequestOutput,
|
|
PoolingOutput, PoolingRequestOutput,
|
|
RequestOutput, ScoringOutput,
|
|
ScoringRequestOutput)
|
|
from vllm.pooling_params import PoolingParams
|
|
from vllm.sampling_params import SamplingParams
|
|
else:
|
|
|
|
def __getattr__(name: str) -> typing.Any:
|
|
from importlib import import_module
|
|
|
|
if name in MODULE_ATTRS:
|
|
module_name, attr_name = MODULE_ATTRS[name].split(":")
|
|
module = import_module(module_name, __package__)
|
|
return getattr(module, attr_name)
|
|
else:
|
|
raise AttributeError(
|
|
f'module {__package__} has no attribute {name}')
|
|
|
|
|
|
__all__ = [
|
|
"__version__",
|
|
"__version_tuple__",
|
|
"LLM",
|
|
"ModelRegistry",
|
|
"PromptType",
|
|
"TextPrompt",
|
|
"TokensPrompt",
|
|
"SamplingParams",
|
|
"RequestOutput",
|
|
"CompletionOutput",
|
|
"PoolingOutput",
|
|
"PoolingRequestOutput",
|
|
"EmbeddingOutput",
|
|
"EmbeddingRequestOutput",
|
|
"ClassificationOutput",
|
|
"ClassificationRequestOutput",
|
|
"ScoringOutput",
|
|
"ScoringRequestOutput",
|
|
"LLMEngine",
|
|
"EngineArgs",
|
|
"AsyncLLMEngine",
|
|
"AsyncEngineArgs",
|
|
"initialize_ray_cluster",
|
|
"PoolingParams",
|
|
]
|