diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index ca8734d28b..ffea9bb355 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -389,7 +389,7 @@ class Worker(WorkerBase): f"utilize gpu memory. Current kv cache memory in use is " f"{int(self.available_kv_cache_memory_bytes)} bytes.") - logger.info(msg) + logger.debug(msg) # Warm up sampler and preallocate memory buffer for logits and other # sampling related tensors of max possible shape to avoid memory