mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Misc] Update lmcache connector with the latest connector apis (#19441)
Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn>
This commit is contained in:
@ -1,6 +1,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl
|
from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl
|
||||||
@ -87,6 +87,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
|
|||||||
"""
|
"""
|
||||||
self._lmcache_engine.wait_for_save()
|
self._lmcache_engine.wait_for_save()
|
||||||
|
|
||||||
|
def get_finished(
|
||||||
|
self, finished_req_ids: set[str]
|
||||||
|
) -> tuple[Optional[set[str]], Optional[set[str]]]:
|
||||||
|
"""
|
||||||
|
Notifies worker-side connector ids of requests that have
|
||||||
|
finished generating tokens.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ids of requests that have finished asynchronous transfer
|
||||||
|
(requests that previously returned True from request_finished()),
|
||||||
|
tuple of (sending/saving ids, recving/loading ids).
|
||||||
|
The finished saves/sends req ids must belong to a set provided in a
|
||||||
|
call to this method (this call or a prior one).
|
||||||
|
"""
|
||||||
|
return self._lmcache_engine.get_finished(finished_req_ids)
|
||||||
|
|
||||||
# ==============================
|
# ==============================
|
||||||
# Scheduler-side methods
|
# Scheduler-side methods
|
||||||
# ==============================
|
# ==============================
|
||||||
@ -132,3 +148,20 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
|
|||||||
scheduler_output (SchedulerOutput): the scheduler output object.
|
scheduler_output (SchedulerOutput): the scheduler output object.
|
||||||
"""
|
"""
|
||||||
return self._lmcache_engine.build_connector_meta(scheduler_output)
|
return self._lmcache_engine.build_connector_meta(scheduler_output)
|
||||||
|
|
||||||
|
def request_finished(
|
||||||
|
self,
|
||||||
|
request: "Request",
|
||||||
|
block_ids: list[int],
|
||||||
|
) -> tuple[bool, Optional[dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Called when a request has finished, before its blocks are freed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the request is being saved/sent asynchronously and blocks
|
||||||
|
should not be freed until the request_id is returned from
|
||||||
|
get_finished().
|
||||||
|
Optional KVTransferParams to be included in the request outputs
|
||||||
|
returned by the engine.
|
||||||
|
"""
|
||||||
|
return self._lmcache_engine.request_finished(request, block_ids)
|
||||||
|
Reference in New Issue
Block a user