interface

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-10-20 14:53:52 +08:00 · 2025-03-12 00:00:03 -07:00
parent 8d46d5d11d
commit 06e22ba44c
2 changed files with 71 additions and 1 deletions
--- a/vllm/v1/core/sched/interface.py
+++ b/vllm/v1/core/sched/interface.py
@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Optional, Union
+
+if TYPE_CHECKING:
+    from vllm.v1.core.sched.output import SchedulerOutput
+    from vllm.v1.engine import EngineCoreOutputs
+    from vllm.v1.metrics.stats import SchedulerStats
+    from vllm.v1.outputs import ModelRunnerOutput
+    from vllm.v1.request import Request, RequestStatus
+
+
+class SchedulerInterface(ABC):
+
+    @abstractmethod
+    def schedule(self) -> "SchedulerOutput":
+        raise NotImplementedError
+
+    @abstractmethod
+    def update_from_output(
+        self,
+        scheduler_output: "SchedulerOutput",
+        model_runner_output: "ModelRunnerOutput",
+    ) -> "EngineCoreOutputs":
+        raise NotImplementedError
+
+    @abstractmethod
+    def add_request(self, request: "Request") -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def finish_requests(
+        self,
+        request_ids: Union[str, Iterable[str]],
+        finished_status: "RequestStatus",
+    ) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_num_unfinished_requests(self) -> int:
+        raise NotImplementedError
+
+    @abstractmethod
+    def has_unfinished_requests(self) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def has_finished_requests(self) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def has_requests(self) -> bool:
+        """Returns True if there are unfinished requests, or finished requests
+        not yet returned in SchedulerOutputs."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_num_unscheduled_requests(self) -> int:
+        """Number of requests that are not being processed by the executor."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def reset_prefix_cache(self) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def make_stats(self) -> Optional["SchedulerStats"]:
+        raise NotImplementedError
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@ -13,6 +13,7 @@ from vllm.logger import init_logger
 from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager,
                                                compute_encoder_budget)
 from vllm.v1.core.kv_cache_manager import KVCacheManager
+from vllm.v1.core.sched.interface import SchedulerInterface
 from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData,
                                       SchedulerOutput)
 from vllm.v1.core.sched.utils import check_stop
@ -26,7 +27,7 @@ from vllm.v1.structured_output import StructuredOutputManager
 logger = init_logger(__name__)


-class Scheduler:
+class Scheduler(SchedulerInterface):

    def __init__(
        self,