Fix error when tp > 1 (#2644)

Co-authored-by: zhaoyang-star <zhao.yang16@zte.com.cn>
2025-10-20 14:53:52 +08:00 · 2024-01-29 14:47:39 +08:00
parent 9090bf02e7
commit b72af8f1ed
1 changed files with 2 additions and 3 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@ -236,7 +236,6 @@ class LLMEngine:
        model_config = copy.deepcopy(self.model_config)
        parallel_config = copy.deepcopy(self.parallel_config)
        scheduler_config = copy.deepcopy(self.scheduler_config)
-        cache_config = copy.deepcopy(self.cache_config)

        for rank, (worker, (node_id,
                            _)) in enumerate(zip(self.workers,
@ -252,7 +251,7 @@ class LLMEngine:
                    rank,
                    distributed_init_method,
                    lora_config=self.lora_config,
-                    cache_config=cache_config,
+                    kv_cache_dtype=self.cache_config.cache_dtype,
                ))

        driver_rank = 0
@ -265,7 +264,7 @@ class LLMEngine:
            driver_rank,
            distributed_init_method,
            lora_config=self.lora_config,
-            cache_config=cache_config,
+            kv_cache_dtype=self.cache_config.cache_dtype,
            is_driver_worker=True,
        )