[serve] guard imports (#39825)

guard imports
2025-10-20 17:13:56 +08:00 · 2025-08-18 16:28:10 +01:00
parent 7a0ba0d7d8
commit d6fad86d23
1 changed files with 11 additions and 5 deletions
--- a/src/transformers/commands/serving.py
+++ b/src/transformers/commands/serving.py
@ -32,7 +32,6 @@ from typing import Optional, Union

 from huggingface_hub import model_info
 from huggingface_hub.constants import HF_HUB_OFFLINE
-from PIL import Image

 import transformers
 from transformers.models.auto.modeling_auto import (
@ -45,6 +44,7 @@ from transformers.utils.import_utils import (
    is_openai_available,
    is_pydantic_available,
    is_uvicorn_available,
+    is_vision_available,
 )

 from .. import (
@ -54,7 +54,6 @@ from .. import (
    ProcessorMixin,
    TextIteratorStreamer,
 )
-from ..generation.continuous_batching import ContinuousBatchingManager, RequestStatus
 from ..utils import is_torch_available, logging
 from . import BaseTransformersCLICommand

@ -69,9 +68,14 @@ if is_torch_available():
        PreTrainedModel,
    )

+    from ..generation.continuous_batching import ContinuousBatchingManager, RequestStatus
+
 if is_librosa_available():
    import librosa

+if is_vision_available():
+    from PIL import Image
+
 serve_dependencies_available = (
    is_pydantic_available() and is_fastapi_available() and is_uvicorn_available() and is_openai_available()
 )
@ -811,7 +815,7 @@ class ServeCommand(BaseTransformersCLICommand):
        return stream_chat_completion(inputs[0])

    @staticmethod
-    def get_model_modality(model: PreTrainedModel) -> Modality:
+    def get_model_modality(model: "PreTrainedModel") -> Modality:
        model_classname = model.__class__.__name__
        if model_classname in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values():
            modality = Modality.VLM
@ -1545,7 +1549,9 @@ class ServeCommand(BaseTransformersCLICommand):
        logger.info(f"Loaded model {model_id_and_revision}")
        return model, data_processor

-    def load_model_and_processor(self, model_id_and_revision: str) -> tuple[PreTrainedModel, PreTrainedTokenizerFast]:
+    def load_model_and_processor(
+        self, model_id_and_revision: str
+    ) -> tuple["PreTrainedModel", PreTrainedTokenizerFast]:
        """
        Loads the text model and processor from the given model ID and revision into the ServeCommand instance.

@ -1570,7 +1576,7 @@ class ServeCommand(BaseTransformersCLICommand):

        return model, processor

-    def load_audio_model_and_processor(self, model_id_and_revision: str) -> tuple[PreTrainedModel, ProcessorMixin]:
+    def load_audio_model_and_processor(self, model_id_and_revision: str) -> tuple["PreTrainedModel", ProcessorMixin]:
        """
        Loads the audio model and processor from the given model ID and revision into the ServeCommand instance.