Files
vllm/tests/entrypoints/llm/test_collective_rpc.py
Tahsin Tunan 43721bc67f [CI] Replace large models with tiny alternatives in tests (#24057)
Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-16 15:51:27 +01:00

37 lines
1.2 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
from vllm import LLM
from ...utils import create_new_process_for_each_test
@pytest.mark.parametrize("tp_size", [1, 2])
@pytest.mark.parametrize("backend", ["mp", "ray"])
@create_new_process_for_each_test()
def test_collective_rpc(tp_size, backend, monkeypatch):
if torch.cuda.device_count() < tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
if tp_size == 1 and backend == "ray":
pytest.skip("Skip duplicate test case")
if tp_size == 1:
backend = None
# intentionally define the method and class in the test function,
# to test if they can be serialized and sent to the workers
def echo_rank(self):
return self.rank
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
llm = LLM(
model="hmellor/tiny-random-LlamaForCausalLM",
enforce_eager=True,
load_format="dummy",
tensor_parallel_size=tp_size,
distributed_executor_backend=backend,
)
assert llm.collective_rpc(echo_rank) == list(range(tp_size))