From 6443ea337df843681bc558d99efa84a3e5559b7f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 22 Aug 2025 17:07:30 -0700 Subject: [PATCH] enable more tests (#161192) Enable more vllm test against pytorch main, add schedule to run the test every 12 hours. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161192 Approved by: https://github.com/huydhn --- .ci/lumen_cli/cli/lib/core/vllm/lib.py | 135 ++++++++++++++++++- .ci/lumen_cli/cli/lib/core/vllm/vllm_test.py | 14 +- .ci/lumen_cli/cli/test_cli/register_test.py | 13 +- .ci/lumen_cli/tests/test_run_plan.py | 10 +- .ci/pytorch/test.sh | 2 +- .github/ci_configs/vllm/Dockerfile.tmp_vllm | 1 - .github/workflows/vllm.yml | 13 +- 7 files changed, 174 insertions(+), 14 deletions(-) diff --git a/.ci/lumen_cli/cli/lib/core/vllm/lib.py b/.ci/lumen_cli/cli/lib/core/vllm/lib.py index 2fa2618a27d8..7f3a930b2cc6 100644 --- a/.ci/lumen_cli/cli/lib/core/vllm/lib.py +++ b/.ci/lumen_cli/cli/lib/core/vllm/lib.py @@ -3,7 +3,7 @@ from typing import Any from cli.lib.common.git_helper import clone_external_repo from cli.lib.common.pip_helper import pip_install_packages -from cli.lib.common.utils import run_command, working_directory +from cli.lib.common.utils import run_command, temp_environ, working_directory logger = logging.getLogger(__name__) @@ -20,8 +20,10 @@ def sample_vllm_test_library(): "vllm_basic_correctness_test": { "title": "Basic Correctness Test", "id": "vllm_basic_correctness_test", + "env_vars": { + "VLLM_WORKER_MULTIPROC_METHOD": "spawn", + }, "steps": [ - "export VLLM_WORKER_MULTIPROC_METHOD=spawn", "pytest -v -s basic_correctness/test_cumem.py", "pytest -v -s basic_correctness/test_basic_correctness.py", "pytest -v -s basic_correctness/test_cpu_offload.py", @@ -42,8 +44,10 @@ def sample_vllm_test_library(): "vllm_entrypoints_test": { "title": "Entrypoints Test ", "id": "vllm_entrypoints_test", + "env_vars": { + "VLLM_WORKER_MULTIPROC_METHOD": "spawn", + }, "steps": [ - "export VLLM_WORKER_MULTIPROC_METHOD=spawn", " ".join( [ "pytest", @@ -70,10 +74,105 @@ def sample_vllm_test_library(): "pytest -v -s test_regression.py", ], }, + "vllm_lora_tp_test_distributed": { + "title": "LoRA TP Test (Distributed)", + "id": "vllm_lora_tp_test_distributed", + "env_vars": { + "VLLM_WORKER_MULTIPROC_METHOD": "spawn", + }, + "num_gpus": 4, + "steps": [ + "pytest -v -s -x lora/test_chatglm3_tp.py", + "echo $VLLM_WORKER_MULTIPROC_METHOD", + "pytest -v -s -x lora/test_llama_tp.py", + "pytest -v -s -x lora/test_multi_loras_with_tp.py", + ], + }, + "vllm_lora_280_failure_test": { + "title": "LoRA 280 failure test", + "id": "vllm_lora_280_failure_test", + "steps": ["pytest -v lora/test_quant_model.py"], + }, + "vllm_multi_model_processor_test": { + "title": "Multi-Modal Processor Test", + "id": "vllm_multi_model_processor_test", + "package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"], + "steps": [ + "pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py", + ], + }, + "vllm_pytorch_compilation_unit_tests": { + "title": "PyTorch Compilation Unit Tests", + "id": "vllm_pytorch_compilation_unit_tests", + "steps": [ + "pytest -v -s compile/test_pass_manager.py", + "pytest -v -s compile/test_fusion.py", + "pytest -v -s compile/test_fusion_attn.py", + "pytest -v -s compile/test_silu_mul_quant_fusion.py", + "pytest -v -s compile/test_sequence_parallelism.py", + "pytest -v -s compile/test_async_tp.py", + "pytest -v -s compile/test_fusion_all_reduce.py", + "pytest -v -s compile/test_decorator.py", + ], + }, + # TODO(elainewy):need to add g6 with 4 gpus to run this test + "vllm_lora_test": { + "title": "LoRA Test %N", + "id": "lora_test", + "parallelism": 4, + "steps": [ + "echo '[checking] list sharded lora tests:'", + " ".join( + [ + "pytest -q --collect-only lora", + "--shard-id=$$BUILDKITE_PARALLEL_JOB", + "--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT", + "--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py", + ] + ), + "echo '[checking] Done. list lora tests'", + " ".join( + [ + "pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB", + "--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT", + "--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py", + ] + ), + ], + }, } -def run_test_plan(test_plan: str, test_target: str, tests_map: dict[str, Any]): +def check_parallelism(tests: Any, title: str, shard_id: int = 0, num_shards: int = 0): + """ + a method to check if the test plan is parallelism or not. + """ + parallelism = int(tests.get("parallelism", "0")) + is_parallel = parallelism and parallelism > 1 + + if not is_parallel: + return False + + if shard_id > num_shards: + raise RuntimeError( + f"Test {title} expects {num_shards} shards, but invalid {shard_id} is provided" + ) + + if num_shards != parallelism: + raise RuntimeError( + f"Test {title} expects {parallelism} shards, but invalid {num_shards} is provided" + ) + + return True + + +def run_test_plan( + test_plan: str, + test_target: str, + tests_map: dict[str, Any], + shard_id: int = 0, + num_shards: int = 0, +): """ a method to run list of tests based on the test plan. """ @@ -83,17 +182,31 @@ def run_test_plan(test_plan: str, test_target: str, tests_map: dict[str, Any]): f"test {test_plan} not found, please add it to test plan pool" ) tests = tests_map[test_plan] - logger.info("Running tests: %s", tests["title"]) pkgs = tests.get("package_install", []) + title = tests.get("title", "unknown test") + + is_parallel = check_parallelism(tests, title, shard_id, num_shards) + if is_parallel: + title = title.replace("%N", f"{shard_id}/{num_shards}") + + logger.info("Running tests: %s", title) if pkgs: logger.info("Installing packages: %s", pkgs) pip_install_packages(packages=pkgs, prefer_uv=True) - with working_directory(tests.get("working_directory", "tests")): + with ( + working_directory(tests.get("working_directory", "tests")), + temp_environ(tests.get("env_vars", {})), + ): failures = [] for step in tests["steps"]: + logger.info("Running step: %s", step) + if is_parallel: + step = replace_buildkite_placeholders(step, shard_id, num_shards) + logger.info("Running parallel step: %s", step) code = run_command(cmd=step, check=False, use_shell=True) if code != 0: failures.append(step) + logger.info("Finish running step: %s", step) if failures: logger.error("Failed tests: %s", failures) raise RuntimeError(f"{len(failures)} pytest runs failed: {failures}") @@ -107,3 +220,13 @@ def clone_vllm(dst: str = "vllm"): dst=dst, update_submodules=True, ) + + +def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str: + mapping = { + "$$BUILDKITE_PARALLEL_JOB_COUNT": str(num_shards), + "$$BUILDKITE_PARALLEL_JOB": str(shard_id), + } + for k in sorted(mapping, key=len, reverse=True): + step = step.replace(k, mapping[k]) + return step diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py index e4a3a932bc57..2be8e246486e 100644 --- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py +++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py @@ -61,6 +61,9 @@ class VllmTestRunner(BaseRunner): self.test_plan = "" self.test_type = TestInpuType.UNKNOWN + self.shard_id = args.shard_id + self.num_shards = args.num_shards + if args.test_plan: self.test_plan = args.test_plan self.test_type = TestInpuType.TEST_PLAN @@ -103,7 +106,16 @@ class VllmTestRunner(BaseRunner): self.prepare() with working_directory(self.work_directory): if self.test_type == TestInpuType.TEST_PLAN: - run_test_plan(self.test_plan, "vllm", sample_vllm_test_library()) + if self.num_shards > 1: + run_test_plan( + self.test_plan, + "vllm", + sample_vllm_test_library(), + self.shard_id, + self.num_shards, + ) + else: + run_test_plan(self.test_plan, "vllm", sample_vllm_test_library()) else: raise ValueError(f"Unknown test type {self.test_type}") diff --git a/.ci/lumen_cli/cli/test_cli/register_test.py b/.ci/lumen_cli/cli/test_cli/register_test.py index 20132b6d5554..2973341b83ed 100644 --- a/.ci/lumen_cli/cli/test_cli/register_test.py +++ b/.ci/lumen_cli/cli/test_cli/register_test.py @@ -22,6 +22,18 @@ def common_args(parser: argparse.ArgumentParser) -> None: """ Add common CLI arguments to the given parser. """ + parser.add_argument( + "--shard-id", + type=int, + default=1, + help="a shard id to run, e.g. '0,1,2,3'", + ) + parser.add_argument( + "--num-shards", + type=int, + default=1, + help="a number of shards to run, e.g. '4'", + ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "-tp", @@ -29,7 +41,6 @@ def common_args(parser: argparse.ArgumentParser) -> None: type=str, help="a pre-defined test plan to run, e.g. 'basic_correctness_test'", ) - # TODO(elainewy):add another common option that user can trigger a specific test with test config def register_test_commands(subparsers: argparse._SubParsersAction) -> None: diff --git a/.ci/lumen_cli/tests/test_run_plan.py b/.ci/lumen_cli/tests/test_run_plan.py index 2d07827a1f69..a85ed2e3986f 100644 --- a/.ci/lumen_cli/tests/test_run_plan.py +++ b/.ci/lumen_cli/tests/test_run_plan.py @@ -45,6 +45,10 @@ def patch_module(monkeypatch): workdir_calls.append(path) return nullcontext() + def fake_temp_env(map: dict[str, str]): + temp_calls.append(map) + return nullcontext() + logger = SimpleNamespace( info=MagicMock(name="logger.info"), error=MagicMock(name="logger.error"), @@ -58,6 +62,7 @@ def patch_module(monkeypatch): monkeypatch.setattr( module, "working_directory", fake_working_directory, raising=True ) + monkeypatch.setattr(module, "temp_environ", fake_temp_env, raising=True) monkeypatch.setattr(module, "logger", logger, raising=True) return SimpleNamespace( @@ -79,8 +84,8 @@ def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_modu "title": "Basic suite", "package_install": [], "working_directory": "tests", + "env_vars": {"GLOBAL_FLAG": "1"}, "steps": [ - "export GLOBAL_FLAG=1", "export A=x && pytest -q", "export B=y && pytest -q tests/unit", ], @@ -97,14 +102,13 @@ def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_modu checks = [_get_check(c) for c in calls] assert cmds == [ - "export GLOBAL_FLAG=1", "export A=x && pytest -q", "export B=y && pytest -q tests/unit", ] assert all(chk is False for chk in checks) - # No temp_env assertions anymore assert patch_module.workdir_calls == ["tests"] + assert patch_module.temp_calls == [{"GLOBAL_FLAG": "1"}] def test_installs_packages_when_present(monkeypatch, patch_module): diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index d27516ec9266..5a82ec2fa85e 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -1636,7 +1636,7 @@ elif [[ "$TEST_CONFIG" == *vllm* ]]; then fi echo "VLLM CI TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST" (cd .ci/lumen_cli && python -m pip install -e .) - python -m cli.run test external vllm --test-plan "$TEST_CONFIG" + python -m cli.run test external vllm --test-plan "$TEST_CONFIG" --shard-id "$SHARD_NUMBER" --num-shards "$NUM_TEST_SHARDS" elif [[ "${TEST_CONFIG}" == *executorch* ]]; then test_executorch elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then diff --git a/.github/ci_configs/vllm/Dockerfile.tmp_vllm b/.github/ci_configs/vllm/Dockerfile.tmp_vllm index 54eb415d8b67..330a78424fee 100644 --- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm +++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm @@ -177,7 +177,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt RUN cat torch_build_versions.txt - RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio' #################### BASE BUILD IMAGE #################### diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml index f58dacda84cd..14524069ab5a 100644 --- a/.github/workflows/vllm.yml +++ b/.github/workflows/vllm.yml @@ -5,6 +5,9 @@ on: tags: - ciflow/vllm/* workflow_dispatch: + schedule: + # Every 12 hours starting at 00:00 UTC (00:00 and 12:00) + - cron: '0 0,12 * * *' concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} @@ -41,8 +44,16 @@ jobs: { include: [ { config: "vllm_basic_correctness_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, { config: "vllm_basic_models_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, - { config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, { config: "vllm_entrypoints_test", shard: 1, num_shards: 1,runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_280_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_multi_model_processor_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_pytorch_compilation_unit_tests", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_test", shard: 0, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_test", shard: 1, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_test", shard: 2, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_test", shard: 3, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, + { config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.aws.h100.4"}, ]} secrets: inherit