Files
vllm/tests/entrypoints/openai/test_shutdown.py
2025-10-19 03:06:32 -07:00

94 lines
2.5 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import signal
import subprocess
import sys
import time
import openai
import pytest
from vllm.utils.network_utils import get_open_port
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
@pytest.mark.asyncio
async def test_shutdown_on_engine_failure():
"""Verify that API returns connection error when server process is killed.
Starts a vLLM server, kills it to simulate a crash, then verifies that
subsequent API calls fail appropriately.
"""
port = get_open_port()
proc = subprocess.Popen(
[
# dtype, max-len etc set so that this can run in CI
sys.executable,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
MODEL_NAME,
"--dtype",
"bfloat16",
"--max-model-len",
"128",
"--enforce-eager",
"--port",
str(port),
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"2",
"--disable-frontend-multiprocessing",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
preexec_fn=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN),
)
# Wait for server startup
start_time = time.time()
client = openai.AsyncOpenAI(
base_url=f"http://localhost:{port}/v1",
api_key="dummy",
max_retries=0,
timeout=10,
)
# Poll until server is ready
while time.time() - start_time < 30:
try:
await client.completions.create(
model=MODEL_NAME, prompt="Hello", max_tokens=1
)
break
except Exception:
time.sleep(0.5)
if proc.poll() is not None:
stdout, stderr = proc.communicate(timeout=1)
pytest.fail(
f"Server died during startup. stdout: {stdout}, stderr: {stderr}"
)
else:
proc.terminate()
proc.wait(timeout=5)
pytest.fail("Server failed to start in 30 seconds")
# Kill server to simulate crash
proc.terminate()
time.sleep(1)
# Verify API calls now fail
with pytest.raises((openai.APIConnectionError, openai.APIStatusError)):
await client.completions.create(
model=MODEL_NAME, prompt="This should fail", max_tokens=1
)
return_code = proc.wait(timeout=5)
assert return_code is not None