Compare commits

...

3 Commits

Author SHA1 Message Date
a0801ef6be add workflow to dispatch 2025-08-26 18:20:49 -07:00
735b375db4 echo variables
ghstack-source-id: 3c8f54e83cad9760fb06b39366bea2f31a39342f
Pull-Request: https://github.com/pytorch/pytorch/pull/161565
2025-08-26 17:10:56 -07:00
011155aea3 echo variables
ghstack-source-id: bd39100f9f9c99a5c45b85a48020375ac5f95da6
Pull-Request: https://github.com/pytorch/pytorch/pull/161537
2025-08-26 17:10:55 -07:00
7 changed files with 298 additions and 11 deletions

View File

@ -0,0 +1,223 @@
from __future__ import annotations
import os
from pathlib import Path
from typing import Iterable, Mapping, Optional
import logging
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Iterable, Tuple
logger = logging.getLogger(__name__)
def gh_summary_path() -> Path | None:
"""Return the Path to the GitHub step summary file, or None if not set."""
p = os.environ.get("GITHUB_STEP_SUMMARY")
return Path(p) if p else None
def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
"""
Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
append_content: default true, if True, append to the end of the file, else overwrite the whole file
Returns:
True if written successfully (in GitHub Actions environment),
False if skipped (e.g., running locally where the variable is not set).
"""
sp = gh_summary_path()
if not sp:
# When running locally, just log to console instead of failing.
logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
return False
sp.parent.mkdir(parents=True, exist_ok=True)
mode = "a" if append_content else "w"
with sp.open(mode, encoding="utf-8") as f:
f.write(md.rstrip() + "\n")
return True
def md_heading(text: str, level: int = 2) -> str:
"""Generate a Markdown heading string with the given level (1-6)."""
return f"{'#' * max(1, min(level, 6))} {text}\n"
def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
"""
Render a list of dictionaries as a Markdown table.
The first row (header) is derived from the union of all keys.
# Suppose you want to summarize benchmark results
rows = [
{"name": "transformer-small", "p50": 12.3, "p90(ms)": 18.4},
{"name": "transformer-large", "p50": 45.1, "p90(ms)": 60.7},
]
content = []
content.append(md_heading("Benchmark Results", level=2))
content.append(md_kv_table(rows))
content.append(md_details("Raw logs", "```\n[INFO] benchmark log ...\n```"))
# Join the pieces into one Markdown block
markdown = '\n'.join(content)
# Write to GitHub Actions summary (or log locally if not in CI)
write_gh_step_summary(markdown, append=True)
"""
rows = list(rows)
if not rows:
return "_(no data)_\n"
# Collect all columns across all rows
cols = list({k for r in rows for k in r.keys()})
header = "| " + " | ".join(cols) + " |\n"
sep = "|" + "|".join([" --- " for _ in cols]) + "|\n"
lines = []
for r in rows:
line = "| " + " | ".join(str(r.get(c, "")) for c in cols) + " |\n"
lines.append(line)
return header + sep + "".join(lines) + "\n"
def md_details(summary: str, content: str) -> str:
"""Generate a collapsible <details> block with a summary and inner content."""
return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
# ---- helper test to generate a summary for list of pytest failures ------#
def summarize_failures_by_test_command(
xml_and_labels: Iterable[Tuple[str | Path, str]],
*,
title: str = "Pytest Failures by Test Command",
dedupe_within_command: bool = True,
):
"""
Args:
xml_and_labels: list of (xml_path, label) pairs.
Each XML corresponds to one pytest subprocess (one test command).
Behavior:
- Writes a section per test command if it has failures.
- Each failed test is listed as 'path/to/test.py:test_name'.
Example:
xml = [
("reports/junit_cmd0.xml", "pytest -v -s tests/unit"),
("reports/junit_cmd1.xml", "pytest -v -s tests/integration"),
("reports/junit_cmd2.xml", "pytest -v -s tests/entrypoints"),
]
summarize_failures_by_test_command(
xmls,
title="Consolidated Pytest Failures",
)
"""
write_gh_step_summary(md_heading(title, level=2))
for xml_path, label in xml_and_labels:
xmlp = Path(xml_path)
failed = _parse_failed_simple(xmlp)
if dedupe_within_command:
failed = sorted(set(failed))
if not failed:
continue # skip commands with no failures
write_gh_step_summary(md_heading(f"Test Command: {label}", level=3))
lines = "\n".join(f"- {item}" for item in failed)
write_gh_step_summary(lines + "\n")
def _to_simple_name_from_testcase(tc: ET.Element) -> str:
"""
Convert a <testcase> into 'path/to/test.py:test_name' format.
Prefer the 'file' attribute if available, else fall back to classname.
"""
name = tc.attrib.get("name", "")
file_attr = tc.attrib.get("file")
if file_attr:
return f"{file_attr}:{name}"
classname = tc.attrib.get("classname", "")
parts = classname.split(".") if classname else []
if len(parts) >= 1:
# drop last part if it's a class, treat rest as module path
mod_parts = parts[:-1] if len(parts) >= 2 else parts
mod_path = "/".join(mod_parts) + ".py" if mod_parts else "unknown.py"
return f"{mod_path}:{name}"
return f"unknown.py:{name or 'unknown_test'}"
def _parse_failed_simple(xml_path: Path) -> list[str]:
"""
Parse one XML, return failures as ['tests/a_test.py:test_x', ...].
Only include <failure> and <error>.
"""
if not xml_path.exists():
return []
tree = ET.parse(xml_path)
root = tree.getroot()
failed = []
for tc in root.iter("testcase"):
if any(x.tag in {"failure", "error"} for x in tc):
failed.append(_to_simple_name_from_testcase(tc))
return failed
def summarize_content_from_file(
output_dir: Path,
freeze_file: str,
title: str = "Wheels (pip freeze)",
code_lang: str = "", # e.g. "text" or "ini"
) -> bool:
"""
Read a text file from output_dir/freeze_file and append it to
the GitHub Step Summary as a Markdown code block.
Returns True if something was written, False otherwise.
"""
f = Path(output_dir) / freeze_file
if not f.exists():
return False
content = f.read_text(encoding="utf-8").strip()
if not content:
return False
md = []
md.append(md_heading(title, 2))
md.append(f"```{code_lang}".rstrip())
md.append(content)
md.append("```")
return write_gh_step_summary("\n".join(md) + "\n")
def summarize_wheels(
output_dir: Path,
title: str = "Wheels",
max_depth: Optional[int] = None, # None = unlimited
):
"""
Walk output_dir up to max_depth and list all *.whl files.
Grouped as 'package: filename.whl'.
Args:
output_dir: base directory to search
title: section title in GH summary
max_depth: maximum folder depth relative to output_dir (0 = only top-level)
"""
if not output_dir.exists():
return False
root = Path(output_dir)
lines = [md_heading(title, 2)]
for dirpath, _, filenames in os.walk(root):
depth = Path(dirpath).relative_to(root).parts
if max_depth is not None and len(depth) > max_depth:
# skip going deeper
continue
for fname in sorted(filenames):
if not fname.endswith(".whl"):
continue
pkg = fname.split("-")[0]
relpath = str(Path(dirpath) / fname).replace(str(root) + os.sep, "")
lines.append(f"- {pkg}: {relpath}")
if len(lines) > 1:
write_gh_step_summary("\n".join(lines) + "\n")

View File

@ -45,7 +45,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
# Checkout pinned commit
commit = get_post_build_pinned_commit(target)
logger.info("Checking out pinned commit %s", commit)
logger.info("Checking out pinned %s commit %s", target, commit)
r.git.checkout(commit)
# Update submodules if requested
@ -55,7 +55,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
sm.update(init=True, recursive=True, progress=PrintProgress())
logger.info("Successfully cloned %s", target)
return r
return r, commit
except GitCommandError as e:
logger.error("Git operation failed: %s", e)

View File

@ -1,10 +1,12 @@
import logging
from pathlib import Path
import re
from typing import Any
from cli.lib.common.git_helper import clone_external_repo
from cli.lib.common.pip_helper import pip_install_packages
from cli.lib.common.utils import run_command, temp_environ, working_directory
from cli.lib.common.gh_summary import md_heading, write_gh_step_summary
logger = logging.getLogger(__name__)
@ -214,12 +216,13 @@ def run_test_plan(
def clone_vllm(dst: str = "vllm"):
clone_external_repo(
_, commit = clone_external_repo(
target="vllm",
repo="https://github.com/vllm-project/vllm.git",
dst=dst,
update_submodules=True,
)
return commit
def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:

View File

@ -4,7 +4,6 @@ import textwrap
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from cli.lib.common.cli_helper import BaseRunner
from cli.lib.common.docker_helper import local_image_exists
from cli.lib.common.envs_helper import (
@ -21,7 +20,14 @@ from cli.lib.common.path_helper import (
is_path_exist,
)
from cli.lib.common.utils import run_command
from cli.lib.core.vllm.lib import clone_vllm
from cli.lib.core.vllm.lib import clone_vllm, write_gh_step_summary
from cli.lib.common.gh_summary import (
summarize_content_from_file,
summarize_wheels,
gh_summary_path,
)
import torch
from torch import torch_version
logger = logging.getLogger(__name__)
@ -153,18 +159,60 @@ class VllmBuildRunner(BaseRunner):
"""
inputs = VllmBuildParameters()
logger.info("Running vllm build with inputs: %s", inputs)
clone_vllm()
vllm_commit = clone_vllm()
vllm_sha_url = f"${vllm_commit}](https://github.com/vllm-project/vllm/commit/${vllm_commit})"
write_gh_step_summary(
f"""
## Commit Info
- **Vllm Commit**: `{vllm_sha_url}`
- **Torch Version**: `{torch_version}`
"""
)
self.cp_dockerfile_if_exist(inputs)
# cp torch wheels from root direct to vllm workspace if exist
self.cp_torch_whls_if_exist(inputs)
ensure_dir_exists(inputs.output_dir)
# make sure the output dir to store the build artifacts exist
ensure_dir_exists(Path(inputs.output_dir))
cmd = self._generate_docker_build_cmd(inputs)
logger.info("Running docker build: \n %s", cmd)
run_command(cmd, cwd="vllm", env=os.environ.copy())
try:
run_command(cmd, cwd="vllm", env=os.environ.copy())
finally:
self.genearte_vllm_build_summary(vllm_commit, inputs)
def genearte_vllm_build_summary(
self, vllm_commit: str, inputs: VllmBuildParameters
):
if not gh_summary_path():
return logger.info("Skipping, not detect GH Summary env var....")
logger.info("Generate GH Summary ...")
vllm_sha_url = f"[{vllm_commit}](https://github.com/vllm-project/vllm/commit/{vllm_commit})"
write_gh_step_summary(
f"""
## Build vllm against Pytorch CI
**Vllm Commit**: `{vllm_sha_url}`
"""
)
torch_sha = os.getenv("GITHUB_SHA")
if torch_sha: # only can grab this in github action
torch_sha_url = (
f"[{torch_sha}](https://github.com/pytorch/pytorch/commit/{torch_sha})]"
)
write_gh_step_summary(
f"""
**Pytorch Commit**: `{torch_sha_url}`
"""
)
vllm_artifact_dir = inputs.output_dir / "wheels"
summarize_content_from_file(vllm_artifact_dir, "build_summary.txt", title="Vllm build package summary")
summarize_wheels(inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts")
summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")
def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
if not inputs.use_torch_whl:

View File

@ -215,11 +215,14 @@ def preprocess_test_in(
"torchaudio",
"xformers",
"mamba_ssm",
"pybind11",
] + additional_package_to_move
# Read current requirements
target_path = Path(target_file)
lines = target_path.read_text().splitlines()
pkgs_to_add = []
# Remove lines starting with the package names (==, @, >=) — case-insensitive
pattern = re.compile(rf"^({'|'.join(pkgs_to_remove)})\s*(==|@|>=)", re.IGNORECASE)
kept_lines = [line for line in lines if not pattern.match(line)]
@ -236,7 +239,11 @@ def preprocess_test_in(
]
# Write back: header_lines + blank + kept_lines
out = "\n".join(header_lines + [""] + kept_lines) + "\n"
out_lines = header_lines + [""] + kept_lines
if pkgs_to_add:
out_lines += [""] + pkgs_to_add
out = "\n".join(out_lines) + "\n"
target_path.write_text(out)
logger.info("[INFO] Updated %s", target_file)

View File

@ -48,6 +48,7 @@ runs:
BASE_IMAGE: ${{ inputs.docker-image }}
BUILD_TARGETS: ${{ inputs.build-targets }}
PARENT_OUTPUT_DIR: ${{ inputs.output-dir}}
shell: bash
run: |
set -euo pipefail
@ -61,6 +62,8 @@ runs:
MAX_JOBS="$(nproc --ignore=6)"
export MAX_JOBS
echo "$GITHUB_STEP_SUMMARY"
# Split the comma-separated list and build each target
IFS=',' read -ra TARGETS <<< "$BUILD_TARGETS"
for target in "${TARGETS[@]}"; do

View File

@ -176,6 +176,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
# track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
RUN cat torch_build_versions.txt
RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
@ -358,7 +359,7 @@ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
# Build flashinfer for torch nightly from source around 10 mins
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
ARG ="v0.2.14.post1"
RUN --mount=type=cache,target=/root/.cache/uv \
git clone --depth 1 --recursive --shallow-submodules \
--branch ${FLASHINFER_GIT_REF} \
@ -376,6 +377,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Logging to confirm the torch versions
RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
################### VLLM INSTALLED IMAGE ####################
@ -433,4 +435,5 @@ FROM scratch as export-wheels
# Just copy the wheels we prepared in previous stages
COPY --from=base /workspace/xformers-dist /wheels/xformers
COPY --from=build /workspace/vllm-dist /wheels/vllm
COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python