[Misc] Small: Fix video loader return type annotations. (#20389)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
2025-10-20 14:53:52 +08:00 · 2025-07-02 20:10:39 -07:00
parent 2e25bb12a8
commit b616f6a53d
3 changed files with 14 additions and 11 deletions
--- a/tests/multimodal/test_utils.py
+++ b/tests/multimodal/test_utils.py
@ -172,9 +172,10 @@ async def test_fetch_video_http(video_url: str, num_frames: int):
            "num_frames": num_frames,
        }})

-    video_sync = connector.fetch_video(video_url)
-    video_async = await connector.fetch_video_async(video_url)
-    assert np.array_equal(video_sync[0], video_async[0])
+    video_sync, metadata_sync = connector.fetch_video(video_url)
+    video_async, metadata_async = await connector.fetch_video_async(video_url)
+    assert np.array_equal(video_sync, video_async)
+    assert metadata_sync == metadata_async


 # Used for the next two tests related to `merge_and_sort_multimodal_metadata`.
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@ -228,7 +228,7 @@ class MediaConnector:
        video_url: str,
        *,
        image_mode: str = "RGB",
-    ) -> npt.NDArray:
+    ) -> tuple[npt.NDArray, dict[str, Any]]:
        """
        Load video from a HTTP or base64 data URL.
        """
@ -248,7 +248,7 @@ class MediaConnector:
        video_url: str,
        *,
        image_mode: str = "RGB",
-    ) -> npt.NDArray:
+    ) -> tuple[npt.NDArray, dict[str, Any]]:
        """
        Asynchronously load video from a HTTP or base64 data URL.

--- a/vllm/multimodal/video.py
+++ b/vllm/multimodal/video.py
@ -6,6 +6,7 @@ from abc import abstractmethod
 from functools import partial
 from io import BytesIO
 from pathlib import Path
+from typing import Any

 import numpy as np
 import numpy.typing as npt
@ -57,7 +58,7 @@ class VideoLoader:
    def load_bytes(cls,
                   data: bytes,
                   num_frames: int = -1,
-                   **kwargs) -> npt.NDArray:
+                   **kwargs) -> tuple[npt.NDArray, dict[str, Any]]:
        raise NotImplementedError


@ -106,7 +107,7 @@ class OpenCVVideoBackend(VideoLoader):
    def load_bytes(cls,
                   data: bytes,
                   num_frames: int = -1,
-                   **kwargs) -> npt.NDArray:
+                   **kwargs) -> tuple[npt.NDArray, dict[str, Any]]:
        import cv2

        backend = cls().get_cv2_video_api()
@ -179,12 +180,13 @@ class VideoMediaIO(MediaIO[npt.NDArray]):
        video_loader_backend = envs.VLLM_VIDEO_LOADER_BACKEND
        self.video_loader = VIDEO_LOADER_REGISTRY.load(video_loader_backend)

-    def load_bytes(self, data: bytes) -> npt.NDArray:
+    def load_bytes(self, data: bytes) -> tuple[npt.NDArray, dict[str, Any]]:
        return self.video_loader.load_bytes(data,
                                            num_frames=self.num_frames,
                                            **self.kwargs)

-    def load_base64(self, media_type: str, data: str) -> npt.NDArray:
+    def load_base64(self, media_type: str,
+                    data: str) -> tuple[npt.NDArray, dict[str, Any]]:
        if media_type.lower() == "video/jpeg":
            load_frame = partial(
                self.image_io.load_base64,
@ -194,11 +196,11 @@ class VideoMediaIO(MediaIO[npt.NDArray]):
            return np.stack([
                np.asarray(load_frame(frame_data))
                for frame_data in data.split(",")
-            ])
+            ]), {}

        return self.load_bytes(base64.b64decode(data))

-    def load_file(self, filepath: Path) -> npt.NDArray:
+    def load_file(self, filepath: Path) -> tuple[npt.NDArray, dict[str, Any]]:
        with filepath.open("rb") as f:
            data = f.read()