[V1] Hybrid allocator without prefix caching (#20661)

Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com>
This commit is contained in:
nopperl
2025-07-14 01:55:14 +09:00
committed by GitHub
parent 80d38b8ac8
commit 4bbfc36b16

View File

@ -171,6 +171,35 @@ class KVCacheCoordinator(ABC):
pass
class KVCacheCoordinatorNoPrefixCache(KVCacheCoordinator):
"""
KV cache coordinator to use if prefix caching is disabled or unsupported.
In contrast to UnitaryKVCacheCoordinator and HybridKVCacheCoordinator,
supports arbitrary numbers of KV cache groups (including 0 groups).
Does not implement any features related to prefix caching.
"""
def __init__(self, kv_cache_config: KVCacheConfig, max_model_len: int,
use_eagle: bool, caching_hash_fn: Callable,
enable_kv_cache_events: bool):
super().__init__(kv_cache_config, max_model_len, use_eagle, False,
caching_hash_fn, enable_kv_cache_events)
self.num_single_type_manager = len(self.single_type_managers)
def get_num_common_prefix_blocks(self, request_id: str,
num_running_requests: int) -> list[int]:
return [0] * self.num_single_type_manager
def find_longest_cache_hit(
self,
block_hashes: list[BlockHash],
max_cache_hit_length: int,
) -> tuple[tuple[list[KVCacheBlock], ...], int]:
blocks: tuple[list[KVCacheBlock], ...] = tuple(
[] for _ in range(self.num_single_type_manager))
return blocks, 0
class UnitaryKVCacheCoordinator(KVCacheCoordinator):
"""
KV cache coordinator for models with only one KV cache group. This is the
@ -359,6 +388,10 @@ def get_kv_cache_coordinator(
kv_cache_config: KVCacheConfig, max_model_len: int, use_eagle: bool,
enable_caching: bool, caching_hash_fn: Callable,
enable_kv_cache_events: bool) -> KVCacheCoordinator:
if not enable_caching:
return KVCacheCoordinatorNoPrefixCache(kv_cache_config, max_model_len,
use_eagle, caching_hash_fn,
enable_kv_cache_events)
if len(kv_cache_config.kv_cache_groups) == 1:
return UnitaryKVCacheCoordinator(kv_cache_config, max_model_len,
use_eagle, enable_caching,