mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
enable more tests (#161192)
Enable more vllm test against pytorch main, add schedule to run the test every 12 hours. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161192 Approved by: https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
36ac916929
commit
6443ea337d
@ -3,7 +3,7 @@ from typing import Any
|
||||
|
||||
from cli.lib.common.git_helper import clone_external_repo
|
||||
from cli.lib.common.pip_helper import pip_install_packages
|
||||
from cli.lib.common.utils import run_command, working_directory
|
||||
from cli.lib.common.utils import run_command, temp_environ, working_directory
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -20,8 +20,10 @@ def sample_vllm_test_library():
|
||||
"vllm_basic_correctness_test": {
|
||||
"title": "Basic Correctness Test",
|
||||
"id": "vllm_basic_correctness_test",
|
||||
"env_vars": {
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
},
|
||||
"steps": [
|
||||
"export VLLM_WORKER_MULTIPROC_METHOD=spawn",
|
||||
"pytest -v -s basic_correctness/test_cumem.py",
|
||||
"pytest -v -s basic_correctness/test_basic_correctness.py",
|
||||
"pytest -v -s basic_correctness/test_cpu_offload.py",
|
||||
@ -42,8 +44,10 @@ def sample_vllm_test_library():
|
||||
"vllm_entrypoints_test": {
|
||||
"title": "Entrypoints Test ",
|
||||
"id": "vllm_entrypoints_test",
|
||||
"env_vars": {
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
},
|
||||
"steps": [
|
||||
"export VLLM_WORKER_MULTIPROC_METHOD=spawn",
|
||||
" ".join(
|
||||
[
|
||||
"pytest",
|
||||
@ -70,10 +74,105 @@ def sample_vllm_test_library():
|
||||
"pytest -v -s test_regression.py",
|
||||
],
|
||||
},
|
||||
"vllm_lora_tp_test_distributed": {
|
||||
"title": "LoRA TP Test (Distributed)",
|
||||
"id": "vllm_lora_tp_test_distributed",
|
||||
"env_vars": {
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
},
|
||||
"num_gpus": 4,
|
||||
"steps": [
|
||||
"pytest -v -s -x lora/test_chatglm3_tp.py",
|
||||
"echo $VLLM_WORKER_MULTIPROC_METHOD",
|
||||
"pytest -v -s -x lora/test_llama_tp.py",
|
||||
"pytest -v -s -x lora/test_multi_loras_with_tp.py",
|
||||
],
|
||||
},
|
||||
"vllm_lora_280_failure_test": {
|
||||
"title": "LoRA 280 failure test",
|
||||
"id": "vllm_lora_280_failure_test",
|
||||
"steps": ["pytest -v lora/test_quant_model.py"],
|
||||
},
|
||||
"vllm_multi_model_processor_test": {
|
||||
"title": "Multi-Modal Processor Test",
|
||||
"id": "vllm_multi_model_processor_test",
|
||||
"package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"],
|
||||
"steps": [
|
||||
"pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py",
|
||||
],
|
||||
},
|
||||
"vllm_pytorch_compilation_unit_tests": {
|
||||
"title": "PyTorch Compilation Unit Tests",
|
||||
"id": "vllm_pytorch_compilation_unit_tests",
|
||||
"steps": [
|
||||
"pytest -v -s compile/test_pass_manager.py",
|
||||
"pytest -v -s compile/test_fusion.py",
|
||||
"pytest -v -s compile/test_fusion_attn.py",
|
||||
"pytest -v -s compile/test_silu_mul_quant_fusion.py",
|
||||
"pytest -v -s compile/test_sequence_parallelism.py",
|
||||
"pytest -v -s compile/test_async_tp.py",
|
||||
"pytest -v -s compile/test_fusion_all_reduce.py",
|
||||
"pytest -v -s compile/test_decorator.py",
|
||||
],
|
||||
},
|
||||
# TODO(elainewy):need to add g6 with 4 gpus to run this test
|
||||
"vllm_lora_test": {
|
||||
"title": "LoRA Test %N",
|
||||
"id": "lora_test",
|
||||
"parallelism": 4,
|
||||
"steps": [
|
||||
"echo '[checking] list sharded lora tests:'",
|
||||
" ".join(
|
||||
[
|
||||
"pytest -q --collect-only lora",
|
||||
"--shard-id=$$BUILDKITE_PARALLEL_JOB",
|
||||
"--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT",
|
||||
"--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py",
|
||||
]
|
||||
),
|
||||
"echo '[checking] Done. list lora tests'",
|
||||
" ".join(
|
||||
[
|
||||
"pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB",
|
||||
"--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT",
|
||||
"--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py",
|
||||
]
|
||||
),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def run_test_plan(test_plan: str, test_target: str, tests_map: dict[str, Any]):
|
||||
def check_parallelism(tests: Any, title: str, shard_id: int = 0, num_shards: int = 0):
|
||||
"""
|
||||
a method to check if the test plan is parallelism or not.
|
||||
"""
|
||||
parallelism = int(tests.get("parallelism", "0"))
|
||||
is_parallel = parallelism and parallelism > 1
|
||||
|
||||
if not is_parallel:
|
||||
return False
|
||||
|
||||
if shard_id > num_shards:
|
||||
raise RuntimeError(
|
||||
f"Test {title} expects {num_shards} shards, but invalid {shard_id} is provided"
|
||||
)
|
||||
|
||||
if num_shards != parallelism:
|
||||
raise RuntimeError(
|
||||
f"Test {title} expects {parallelism} shards, but invalid {num_shards} is provided"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def run_test_plan(
|
||||
test_plan: str,
|
||||
test_target: str,
|
||||
tests_map: dict[str, Any],
|
||||
shard_id: int = 0,
|
||||
num_shards: int = 0,
|
||||
):
|
||||
"""
|
||||
a method to run list of tests based on the test plan.
|
||||
"""
|
||||
@ -83,17 +182,31 @@ def run_test_plan(test_plan: str, test_target: str, tests_map: dict[str, Any]):
|
||||
f"test {test_plan} not found, please add it to test plan pool"
|
||||
)
|
||||
tests = tests_map[test_plan]
|
||||
logger.info("Running tests: %s", tests["title"])
|
||||
pkgs = tests.get("package_install", [])
|
||||
title = tests.get("title", "unknown test")
|
||||
|
||||
is_parallel = check_parallelism(tests, title, shard_id, num_shards)
|
||||
if is_parallel:
|
||||
title = title.replace("%N", f"{shard_id}/{num_shards}")
|
||||
|
||||
logger.info("Running tests: %s", title)
|
||||
if pkgs:
|
||||
logger.info("Installing packages: %s", pkgs)
|
||||
pip_install_packages(packages=pkgs, prefer_uv=True)
|
||||
with working_directory(tests.get("working_directory", "tests")):
|
||||
with (
|
||||
working_directory(tests.get("working_directory", "tests")),
|
||||
temp_environ(tests.get("env_vars", {})),
|
||||
):
|
||||
failures = []
|
||||
for step in tests["steps"]:
|
||||
logger.info("Running step: %s", step)
|
||||
if is_parallel:
|
||||
step = replace_buildkite_placeholders(step, shard_id, num_shards)
|
||||
logger.info("Running parallel step: %s", step)
|
||||
code = run_command(cmd=step, check=False, use_shell=True)
|
||||
if code != 0:
|
||||
failures.append(step)
|
||||
logger.info("Finish running step: %s", step)
|
||||
if failures:
|
||||
logger.error("Failed tests: %s", failures)
|
||||
raise RuntimeError(f"{len(failures)} pytest runs failed: {failures}")
|
||||
@ -107,3 +220,13 @@ def clone_vllm(dst: str = "vllm"):
|
||||
dst=dst,
|
||||
update_submodules=True,
|
||||
)
|
||||
|
||||
|
||||
def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
|
||||
mapping = {
|
||||
"$$BUILDKITE_PARALLEL_JOB_COUNT": str(num_shards),
|
||||
"$$BUILDKITE_PARALLEL_JOB": str(shard_id),
|
||||
}
|
||||
for k in sorted(mapping, key=len, reverse=True):
|
||||
step = step.replace(k, mapping[k])
|
||||
return step
|
||||
|
@ -61,6 +61,9 @@ class VllmTestRunner(BaseRunner):
|
||||
self.test_plan = ""
|
||||
self.test_type = TestInpuType.UNKNOWN
|
||||
|
||||
self.shard_id = args.shard_id
|
||||
self.num_shards = args.num_shards
|
||||
|
||||
if args.test_plan:
|
||||
self.test_plan = args.test_plan
|
||||
self.test_type = TestInpuType.TEST_PLAN
|
||||
@ -103,7 +106,16 @@ class VllmTestRunner(BaseRunner):
|
||||
self.prepare()
|
||||
with working_directory(self.work_directory):
|
||||
if self.test_type == TestInpuType.TEST_PLAN:
|
||||
run_test_plan(self.test_plan, "vllm", sample_vllm_test_library())
|
||||
if self.num_shards > 1:
|
||||
run_test_plan(
|
||||
self.test_plan,
|
||||
"vllm",
|
||||
sample_vllm_test_library(),
|
||||
self.shard_id,
|
||||
self.num_shards,
|
||||
)
|
||||
else:
|
||||
run_test_plan(self.test_plan, "vllm", sample_vllm_test_library())
|
||||
else:
|
||||
raise ValueError(f"Unknown test type {self.test_type}")
|
||||
|
||||
|
@ -22,6 +22,18 @@ def common_args(parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Add common CLI arguments to the given parser.
|
||||
"""
|
||||
parser.add_argument(
|
||||
"--shard-id",
|
||||
type=int,
|
||||
default=1,
|
||||
help="a shard id to run, e.g. '0,1,2,3'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-shards",
|
||||
type=int,
|
||||
default=1,
|
||||
help="a number of shards to run, e.g. '4'",
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument(
|
||||
"-tp",
|
||||
@ -29,7 +41,6 @@ def common_args(parser: argparse.ArgumentParser) -> None:
|
||||
type=str,
|
||||
help="a pre-defined test plan to run, e.g. 'basic_correctness_test'",
|
||||
)
|
||||
# TODO(elainewy):add another common option that user can trigger a specific test with test config
|
||||
|
||||
|
||||
def register_test_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
|
@ -45,6 +45,10 @@ def patch_module(monkeypatch):
|
||||
workdir_calls.append(path)
|
||||
return nullcontext()
|
||||
|
||||
def fake_temp_env(map: dict[str, str]):
|
||||
temp_calls.append(map)
|
||||
return nullcontext()
|
||||
|
||||
logger = SimpleNamespace(
|
||||
info=MagicMock(name="logger.info"),
|
||||
error=MagicMock(name="logger.error"),
|
||||
@ -58,6 +62,7 @@ def patch_module(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
module, "working_directory", fake_working_directory, raising=True
|
||||
)
|
||||
monkeypatch.setattr(module, "temp_environ", fake_temp_env, raising=True)
|
||||
monkeypatch.setattr(module, "logger", logger, raising=True)
|
||||
|
||||
return SimpleNamespace(
|
||||
@ -79,8 +84,8 @@ def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_modu
|
||||
"title": "Basic suite",
|
||||
"package_install": [],
|
||||
"working_directory": "tests",
|
||||
"env_vars": {"GLOBAL_FLAG": "1"},
|
||||
"steps": [
|
||||
"export GLOBAL_FLAG=1",
|
||||
"export A=x && pytest -q",
|
||||
"export B=y && pytest -q tests/unit",
|
||||
],
|
||||
@ -97,14 +102,13 @@ def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_modu
|
||||
checks = [_get_check(c) for c in calls]
|
||||
|
||||
assert cmds == [
|
||||
"export GLOBAL_FLAG=1",
|
||||
"export A=x && pytest -q",
|
||||
"export B=y && pytest -q tests/unit",
|
||||
]
|
||||
assert all(chk is False for chk in checks)
|
||||
|
||||
# No temp_env assertions anymore
|
||||
assert patch_module.workdir_calls == ["tests"]
|
||||
assert patch_module.temp_calls == [{"GLOBAL_FLAG": "1"}]
|
||||
|
||||
|
||||
def test_installs_packages_when_present(monkeypatch, patch_module):
|
||||
|
@ -1636,7 +1636,7 @@ elif [[ "$TEST_CONFIG" == *vllm* ]]; then
|
||||
fi
|
||||
echo "VLLM CI TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
|
||||
(cd .ci/lumen_cli && python -m pip install -e .)
|
||||
python -m cli.run test external vllm --test-plan "$TEST_CONFIG"
|
||||
python -m cli.run test external vllm --test-plan "$TEST_CONFIG" --shard-id "$SHARD_NUMBER" --num-shards "$NUM_TEST_SHARDS"
|
||||
elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
|
||||
test_executorch
|
||||
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
||||
|
1
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
1
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
@ -177,7 +177,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
# track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
|
||||
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
|
||||
RUN cat torch_build_versions.txt
|
||||
|
||||
RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
|
||||
|
||||
#################### BASE BUILD IMAGE ####################
|
||||
|
13
.github/workflows/vllm.yml
vendored
13
.github/workflows/vllm.yml
vendored
@ -5,6 +5,9 @@ on:
|
||||
tags:
|
||||
- ciflow/vllm/*
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Every 12 hours starting at 00:00 UTC (00:00 and 12:00)
|
||||
- cron: '0 0,12 * * *'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -41,8 +44,16 @@ jobs:
|
||||
{ include: [
|
||||
{ config: "vllm_basic_correctness_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_basic_models_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_entrypoints_test", shard: 1, num_shards: 1,runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_280_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_multi_model_processor_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_pytorch_compilation_unit_tests", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_test", shard: 0, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_test", shard: 1, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_test", shard: 2, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_test", shard: 3, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.aws.h100.4"},
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
Reference in New Issue
Block a user