[Misc] Update lmcache connector with the latest connector apis (#19441)

Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn>
This commit is contained in:
Jiayi Yao
2025-06-17 12:57:54 -07:00
committed by GitHub
parent bf57ccc5c2
commit cda92307c1

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import TYPE_CHECKING from typing import TYPE_CHECKING, Any, Optional
import torch import torch
from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl
@ -87,6 +87,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
""" """
self._lmcache_engine.wait_for_save() self._lmcache_engine.wait_for_save()
def get_finished(
self, finished_req_ids: set[str]
) -> tuple[Optional[set[str]], Optional[set[str]]]:
"""
Notifies worker-side connector ids of requests that have
finished generating tokens.
Returns:
ids of requests that have finished asynchronous transfer
(requests that previously returned True from request_finished()),
tuple of (sending/saving ids, recving/loading ids).
The finished saves/sends req ids must belong to a set provided in a
call to this method (this call or a prior one).
"""
return self._lmcache_engine.get_finished(finished_req_ids)
# ============================== # ==============================
# Scheduler-side methods # Scheduler-side methods
# ============================== # ==============================
@ -132,3 +148,20 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
scheduler_output (SchedulerOutput): the scheduler output object. scheduler_output (SchedulerOutput): the scheduler output object.
""" """
return self._lmcache_engine.build_connector_meta(scheduler_output) return self._lmcache_engine.build_connector_meta(scheduler_output)
def request_finished(
self,
request: "Request",
block_ids: list[int],
) -> tuple[bool, Optional[dict[str, Any]]]:
"""
Called when a request has finished, before its blocks are freed.
Returns:
True if the request is being saved/sent asynchronously and blocks
should not be freed until the request_id is returned from
get_finished().
Optional KVTransferParams to be included in the request outputs
returned by the engine.
"""
return self._lmcache_engine.request_finished(request, block_ids)