add workflow to dispatch

echo variables
ghstack-source-id: 3c8f54e83cad9760fb06b39366bea2f31a39342f Pull-Request: https://github.com/pytorch/pytorch/pull/161565
2025-10-22 06:11:27 +08:00 · 2025-08-26 18:20:49 -07:00 · 2025-08-26 17:10:56 -07:00 · 2025-08-26 17:10:55 -07:00
7 changed files with 298 additions and 11 deletions
--- a/.ci/lumen_cli/cli/lib/common/gh_summary.py
+++ b/.ci/lumen_cli/cli/lib/common/gh_summary.py
@ -0,0 +1,223 @@
+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Iterable, Mapping, Optional
+import logging
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Iterable, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+def gh_summary_path() -> Path | None:
+    """Return the Path to the GitHub step summary file, or None if not set."""
+    p = os.environ.get("GITHUB_STEP_SUMMARY")
+    return Path(p) if p else None
+
+
+def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
+    """
+    Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
+    append_content: default true, if True, append to the end of the file, else overwrite the whole file
+
+    Returns:
+        True if written successfully (in GitHub Actions environment),
+        False if skipped (e.g., running locally where the variable is not set).
+    """
+    sp = gh_summary_path()
+    if not sp:
+        # When running locally, just log to console instead of failing.
+        logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
+        return False
+    sp.parent.mkdir(parents=True, exist_ok=True)
+    mode = "a" if append_content else "w"
+    with sp.open(mode, encoding="utf-8") as f:
+        f.write(md.rstrip() + "\n")
+    return True
+
+
+def md_heading(text: str, level: int = 2) -> str:
+    """Generate a Markdown heading string with the given level (1-6)."""
+    return f"{'#' * max(1, min(level, 6))} {text}\n"
+
+
+def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
+    """
+    Render a list of dictionaries as a Markdown table.
+    The first row (header) is derived from the union of all keys.
+        # Suppose you want to summarize benchmark results
+        rows = [
+            {"name": "transformer-small", "p50": 12.3, "p90(ms)": 18.4},
+            {"name": "transformer-large", "p50": 45.1, "p90(ms)": 60.7},
+        ]
+        content = []
+        content.append(md_heading("Benchmark Results", level=2))
+        content.append(md_kv_table(rows))
+        content.append(md_details("Raw logs", "```\n[INFO] benchmark log ...\n```"))
+        # Join the pieces into one Markdown block
+        markdown = '\n'.join(content)
+        # Write to GitHub Actions summary (or log locally if not in CI)
+        write_gh_step_summary(markdown, append=True)
+    """
+
+    rows = list(rows)
+    if not rows:
+        return "_(no data)_\n"
+    # Collect all columns across all rows
+    cols = list({k for r in rows for k in r.keys()})
+    header = "| " + " | ".join(cols) + " |\n"
+    sep = "|" + "|".join([" --- " for _ in cols]) + "|\n"
+    lines = []
+    for r in rows:
+        line = "| " + " | ".join(str(r.get(c, "")) for c in cols) + " |\n"
+        lines.append(line)
+    return header + sep + "".join(lines) + "\n"
+
+
+def md_details(summary: str, content: str) -> str:
+    """Generate a collapsible <details> block with a summary and inner content."""
+    return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
+
+
+# ---- helper test to generate a summary for list of pytest failures ------#
+
+
+def summarize_failures_by_test_command(
+    xml_and_labels: Iterable[Tuple[str | Path, str]],
+    *,
+    title: str = "Pytest Failures by Test Command",
+    dedupe_within_command: bool = True,
+):
+    """
+    Args:
+      xml_and_labels: list of (xml_path, label) pairs.
+                      Each XML corresponds to one pytest subprocess (one test command).
+    Behavior:
+      - Writes a section per test command if it has failures.
+      - Each failed test is listed as 'path/to/test.py:test_name'.
+
+    Example:
+        xml = [
+            ("reports/junit_cmd0.xml", "pytest -v -s tests/unit"),
+            ("reports/junit_cmd1.xml", "pytest -v -s tests/integration"),
+            ("reports/junit_cmd2.xml", "pytest -v -s tests/entrypoints"),
+        ]
+        summarize_failures_by_test_command(
+            xmls,
+            title="Consolidated Pytest Failures",
+        )
+    """
+    write_gh_step_summary(md_heading(title, level=2))
+
+    for xml_path, label in xml_and_labels:
+        xmlp = Path(xml_path)
+        failed = _parse_failed_simple(xmlp)
+        if dedupe_within_command:
+            failed = sorted(set(failed))
+        if not failed:
+            continue  # skip commands with no failures
+        write_gh_step_summary(md_heading(f"Test Command: {label}", level=3))
+        lines = "\n".join(f"- {item}" for item in failed)
+        write_gh_step_summary(lines + "\n")
+
+
+def _to_simple_name_from_testcase(tc: ET.Element) -> str:
+    """
+    Convert a <testcase> into 'path/to/test.py:test_name' format.
+    Prefer the 'file' attribute if available, else fall back to classname.
+    """
+    name = tc.attrib.get("name", "")
+    file_attr = tc.attrib.get("file")
+    if file_attr:
+        return f"{file_attr}:{name}"
+
+    classname = tc.attrib.get("classname", "")
+    parts = classname.split(".") if classname else []
+    if len(parts) >= 1:
+        # drop last part if it's a class, treat rest as module path
+        mod_parts = parts[:-1] if len(parts) >= 2 else parts
+        mod_path = "/".join(mod_parts) + ".py" if mod_parts else "unknown.py"
+        return f"{mod_path}:{name}"
+    return f"unknown.py:{name or 'unknown_test'}"
+
+
+def _parse_failed_simple(xml_path: Path) -> list[str]:
+    """
+    Parse one XML, return failures as ['tests/a_test.py:test_x', ...].
+    Only include <failure> and <error>.
+    """
+    if not xml_path.exists():
+        return []
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    failed = []
+    for tc in root.iter("testcase"):
+        if any(x.tag in {"failure", "error"} for x in tc):
+            failed.append(_to_simple_name_from_testcase(tc))
+    return failed
+
+
+def summarize_content_from_file(
+    output_dir: Path,
+    freeze_file: str,
+    title: str = "Wheels (pip freeze)",
+    code_lang: str = "",  # e.g. "text" or "ini"
+) -> bool:
+    """
+    Read a text file from output_dir/freeze_file and append it to
+    the GitHub Step Summary as a Markdown code block.
+
+    Returns True if something was written, False otherwise.
+    """
+
+    f = Path(output_dir) / freeze_file
+    if not f.exists():
+        return False
+
+    content = f.read_text(encoding="utf-8").strip()
+    if not content:
+        return False
+    md = []
+    md.append(md_heading(title, 2))
+    md.append(f"```{code_lang}".rstrip())
+    md.append(content)
+    md.append("```")
+
+    return write_gh_step_summary("\n".join(md) + "\n")
+
+
+def summarize_wheels(
+    output_dir: Path,
+    title: str = "Wheels",
+    max_depth: Optional[int] = None,  # None = unlimited
+):
+    """
+    Walk output_dir up to max_depth and list all *.whl files.
+    Grouped as 'package: filename.whl'.
+
+    Args:
+        output_dir: base directory to search
+        title: section title in GH summary
+        max_depth: maximum folder depth relative to output_dir (0 = only top-level)
+    """
+    if not output_dir.exists():
+        return False
+    root = Path(output_dir)
+    lines = [md_heading(title, 2)]
+
+    for dirpath, _, filenames in os.walk(root):
+        depth = Path(dirpath).relative_to(root).parts
+        if max_depth is not None and len(depth) > max_depth:
+            # skip going deeper
+            continue
+
+        for fname in sorted(filenames):
+            if not fname.endswith(".whl"):
+                continue
+            pkg = fname.split("-")[0]
+            relpath = str(Path(dirpath) / fname).replace(str(root) + os.sep, "")
+            lines.append(f"- {pkg}: {relpath}")
+
+    if len(lines) > 1:
+        write_gh_step_summary("\n".join(lines) + "\n")
--- a/.ci/lumen_cli/cli/lib/common/git_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/git_helper.py
@ -45,7 +45,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules

        # Checkout pinned commit
        commit = get_post_build_pinned_commit(target)
-        logger.info("Checking out pinned commit %s", commit)
+        logger.info("Checking out pinned %s commit %s", target, commit)
        r.git.checkout(commit)

        # Update submodules if requested
@ -55,7 +55,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
                sm.update(init=True, recursive=True, progress=PrintProgress())

        logger.info("Successfully cloned %s", target)
-        return r
+        return r, commit

    except GitCommandError as e:
        logger.error("Git operation failed: %s", e)
--- a/.ci/lumen_cli/cli/lib/core/vllm/lib.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/lib.py
@ -1,10 +1,12 @@
 import logging
+from pathlib import Path
+import re
 from typing import Any

 from cli.lib.common.git_helper import clone_external_repo
 from cli.lib.common.pip_helper import pip_install_packages
 from cli.lib.common.utils import run_command, temp_environ, working_directory
-
+from cli.lib.common.gh_summary import md_heading, write_gh_step_summary

 logger = logging.getLogger(__name__)

@ -214,12 +216,13 @@ def run_test_plan(


 def clone_vllm(dst: str = "vllm"):
-    clone_external_repo(
+    _, commit = clone_external_repo(
        target="vllm",
        repo="https://github.com/vllm-project/vllm.git",
        dst=dst,
        update_submodules=True,
    )
+    return commit


 def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
@ -4,7 +4,6 @@ import textwrap
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
-
 from cli.lib.common.cli_helper import BaseRunner
 from cli.lib.common.docker_helper import local_image_exists
 from cli.lib.common.envs_helper import (
@ -21,7 +20,14 @@ from cli.lib.common.path_helper import (
    is_path_exist,
 )
 from cli.lib.common.utils import run_command
-from cli.lib.core.vllm.lib import clone_vllm
+from cli.lib.core.vllm.lib import clone_vllm, write_gh_step_summary
+from cli.lib.common.gh_summary import (
+    summarize_content_from_file,
+    summarize_wheels,
+    gh_summary_path,
+)
+import torch
+from torch import torch_version


 logger = logging.getLogger(__name__)
@ -153,18 +159,60 @@ class VllmBuildRunner(BaseRunner):
        """
        inputs = VllmBuildParameters()
        logger.info("Running vllm build with inputs: %s", inputs)
-        clone_vllm()
+        vllm_commit = clone_vllm()
+
+        vllm_sha_url = f"${vllm_commit}](https://github.com/vllm-project/vllm/commit/${vllm_commit})"
+        write_gh_step_summary(
+            f"""
+            ## Commit Info
+            - **Vllm Commit**: `{vllm_sha_url}`
+            - **Torch Version**: `{torch_version}`
+            """
+        )

        self.cp_dockerfile_if_exist(inputs)

        # cp torch wheels from root direct to vllm workspace if exist
        self.cp_torch_whls_if_exist(inputs)

-        ensure_dir_exists(inputs.output_dir)
+        # make sure the output dir to store the build artifacts exist
+        ensure_dir_exists(Path(inputs.output_dir))

        cmd = self._generate_docker_build_cmd(inputs)
        logger.info("Running docker build: \n %s", cmd)
-        run_command(cmd, cwd="vllm", env=os.environ.copy())
+
+        try:
+            run_command(cmd, cwd="vllm", env=os.environ.copy())
+        finally:
+            self.genearte_vllm_build_summary(vllm_commit, inputs)
+
+    def genearte_vllm_build_summary(
+        self, vllm_commit: str, inputs: VllmBuildParameters
+    ):
+        if not gh_summary_path():
+            return logger.info("Skipping, not detect GH Summary env var....")
+        logger.info("Generate GH Summary ...")
+        vllm_sha_url = f"[{vllm_commit}](https://github.com/vllm-project/vllm/commit/{vllm_commit})"
+        write_gh_step_summary(
+            f"""
+            ## Build vllm against Pytorch CI
+            **Vllm Commit**: `{vllm_sha_url}`
+            """
+        )
+        torch_sha = os.getenv("GITHUB_SHA")
+        if torch_sha:  # only can grab this in github action
+            torch_sha_url = (
+                f"[{torch_sha}](https://github.com/pytorch/pytorch/commit/{torch_sha})]"
+            )
+            write_gh_step_summary(
+                f"""
+             **Pytorch Commit**: `{torch_sha_url}`
+             """
+            )
+        vllm_artifact_dir = inputs.output_dir / "wheels"
+        summarize_content_from_file(vllm_artifact_dir, "build_summary.txt", title="Vllm build package summary")
+        summarize_wheels(inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts")
+        summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")

    def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
        if not inputs.use_torch_whl:
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
@ -215,11 +215,14 @@ def preprocess_test_in(
        "torchaudio",
        "xformers",
        "mamba_ssm",
+        "pybind11",
    ] + additional_package_to_move
    # Read current requirements
    target_path = Path(target_file)
    lines = target_path.read_text().splitlines()

+    pkgs_to_add = []
+
    # Remove lines starting with the package names (==, @, >=) — case-insensitive
    pattern = re.compile(rf"^({'|'.join(pkgs_to_remove)})\s*(==|@|>=)", re.IGNORECASE)
    kept_lines = [line for line in lines if not pattern.match(line)]
@ -236,7 +239,11 @@ def preprocess_test_in(
    ]

    # Write back: header_lines + blank + kept_lines
-    out = "\n".join(header_lines + [""] + kept_lines) + "\n"
+    out_lines = header_lines + [""] + kept_lines
+    if pkgs_to_add:
+        out_lines += [""] + pkgs_to_add
+
+    out = "\n".join(out_lines) + "\n"
    target_path.write_text(out)
    logger.info("[INFO] Updated %s", target_file)

--- a/.github/actions/build-external-packages/action.yml
+++ b/.github/actions/build-external-packages/action.yml
@ -48,6 +48,7 @@ runs:
        BASE_IMAGE: ${{ inputs.docker-image }}
        BUILD_TARGETS: ${{ inputs.build-targets }}
        PARENT_OUTPUT_DIR: ${{ inputs.output-dir}}
+
      shell: bash
      run: |
        set -euo pipefail
@ -61,6 +62,8 @@ runs:
        MAX_JOBS="$(nproc --ignore=6)"
        export MAX_JOBS

+        echo "$GITHUB_STEP_SUMMARY"
+
        # Split the comma-separated list and build each target
        IFS=',' read -ra TARGETS <<< "$BUILD_TARGETS"
        for target in "${TARGETS[@]}"; do
--- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm
+++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm
@ -176,6 +176,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
 # track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
 RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
+
 RUN cat torch_build_versions.txt
 RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'

@ -358,7 +359,7 @@ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 # Build flashinfer for torch nightly from source around 10 mins
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
 # Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
-ARG FLASHINFER_GIT_REF="v0.2.9rc2"
+ARG ="v0.2.14.post1"
 RUN --mount=type=cache,target=/root/.cache/uv \
    git clone --depth 1 --recursive --shallow-submodules \
        --branch ${FLASHINFER_GIT_REF} \
@ -376,6 +377,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \

 # Logging to confirm the torch versions
 RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
+RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
 ################### VLLM INSTALLED IMAGE ####################


@ -433,4 +435,5 @@ FROM scratch as export-wheels
 # Just copy the wheels we prepared in previous stages
 COPY --from=base /workspace/xformers-dist /wheels/xformers
 COPY --from=build /workspace/vllm-dist /wheels/vllm
+COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
 COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python